Search in sources :

Example 1 with HashMultiset

use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.HashMultiset in project beam by apache.

the class FlinkStreamingPortablePipelineTranslator method translateFlatten.

private <T> void translateFlatten(String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) {
    RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id);
    Map<String, String> allInputs = transform.getInputsMap();
    if (allInputs.isEmpty()) {
        // create an empty dummy source to satisfy downstream operations
        // we cannot create an empty source in Flink, therefore we have to
        // add the flatMap that simply never forwards the single element
        long shutdownAfterIdleSourcesMs = context.getPipelineOptions().getShutdownSourcesAfterIdleMs();
        DataStreamSource<WindowedValue<byte[]>> dummySource = context.getExecutionEnvironment().addSource(new ImpulseSourceFunction(shutdownAfterIdleSourcesMs));
        DataStream<WindowedValue<T>> result = dummySource.<WindowedValue<T>>flatMap((s, collector) -> {
        // never return anything
        }).returns(new CoderTypeInformation<>(WindowedValue.getFullCoder((Coder<T>) VoidCoder.of(), GlobalWindow.Coder.INSTANCE), context.getPipelineOptions()));
        context.addDataStream(Iterables.getOnlyElement(transform.getOutputsMap().values()), result);
    } else {
        DataStream<T> result = null;
        // Determine DataStreams that we use as input several times. For those, we need to uniquify
        // input streams because Flink seems to swallow watermarks when we have a union of one and
        // the same stream.
        HashMultiset<DataStream<T>> inputCounts = HashMultiset.create();
        for (String input : allInputs.values()) {
            DataStream<T> current = context.getDataStreamOrThrow(input);
            inputCounts.add(current, 1);
        }
        for (String input : allInputs.values()) {
            DataStream<T> current = context.getDataStreamOrThrow(input);
            final int timesRequired = inputCounts.count(current);
            if (timesRequired > 1) {
                current = current.flatMap(new FlatMapFunction<T, T>() {

                    private static final long serialVersionUID = 1L;

                    @Override
                    public void flatMap(T t, Collector<T> collector) {
                        collector.collect(t);
                    }
                });
            }
            result = (result == null) ? current : result.union(current);
        }
        context.addDataStream(Iterables.getOnlyElement(transform.getOutputsMap().values()), result);
    }
}
Also used : SingletonKeyedWorkItemCoder(org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItemCoder) WindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder) FlinkExecutableStageContextFactory(org.apache.beam.runners.flink.translation.functions.FlinkExecutableStageContextFactory) CoderUtils(org.apache.beam.sdk.util.CoderUtils) WireCoders(org.apache.beam.runners.fnexecution.wire.WireCoders) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) PCollectionViews(org.apache.beam.sdk.values.PCollectionViews) SdkHarnessClient(org.apache.beam.runners.fnexecution.control.SdkHarnessClient) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) ImmutableSet(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableSet) TestStreamSource(org.apache.beam.runners.flink.translation.wrappers.streaming.io.TestStreamSource) Map(java.util.Map) TestStreamTranslation(org.apache.beam.runners.core.construction.TestStreamTranslation) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) JsonNode(com.fasterxml.jackson.databind.JsonNode) CoderTypeInformation(org.apache.beam.runners.flink.translation.types.CoderTypeInformation) KvCoder(org.apache.beam.sdk.coders.KvCoder) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) WindowDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator) Set(java.util.Set) OutputTag(org.apache.flink.util.OutputTag) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ExecutableStageTranslation.generateNameFromStagePayload(org.apache.beam.runners.core.construction.ExecutableStageTranslation.generateNameFromStagePayload) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) CoderException(org.apache.beam.sdk.coders.CoderException) WindowingStrategyTranslation(org.apache.beam.runners.core.construction.WindowingStrategyTranslation) TestStream(org.apache.beam.sdk.testing.TestStream) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) ValueWithRecordId(org.apache.beam.sdk.values.ValueWithRecordId) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) StreamingImpulseSource(org.apache.beam.runners.flink.translation.wrappers.streaming.io.StreamingImpulseSource) RichMapFunction(org.apache.flink.api.common.functions.RichMapFunction) Collector(org.apache.flink.util.Collector) TupleTag(org.apache.beam.sdk.values.TupleTag) Maps(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Maps) BiMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.BiMap) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) QueryablePipeline(org.apache.beam.runners.core.construction.graph.QueryablePipeline) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) SingleOutputStreamOperator(org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator) IOException(java.io.IOException) DedupingOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.io.DedupingOperator) BoundedSource(org.apache.beam.sdk.io.BoundedSource) TreeMap(java.util.TreeMap) PCollectionView(org.apache.beam.sdk.values.PCollectionView) AutoService(com.google.auto.service.AutoService) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) UnboundedSourceWrapper(org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper) FileSystems(org.apache.beam.sdk.io.FileSystems) SystemReduceFn(org.apache.beam.runners.core.SystemReduceFn) SerializablePipelineOptions(org.apache.beam.runners.core.construction.SerializablePipelineOptions) WindowedValue(org.apache.beam.sdk.util.WindowedValue) PipelineTranslatorUtils.getWindowingStrategy(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.getWindowingStrategy) WorkItemKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.WorkItemKeySelector) KvToByteBufferKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.KvToByteBufferKeySelector) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) DoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator) ByteBuffer(java.nio.ByteBuffer) Sets(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets) Locale(java.util.Locale) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) KeyedWorkItem(org.apache.beam.runners.core.KeyedWorkItem) KeySelector(org.apache.flink.api.java.functions.KeySelector) TwoInputTransformation(org.apache.flink.streaming.api.transformations.TwoInputTransformation) KeyedStream(org.apache.flink.streaming.api.datastream.KeyedStream) String.format(java.lang.String.format) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) UnionCoder(org.apache.beam.sdk.transforms.join.UnionCoder) JobExecutionResult(org.apache.flink.api.common.JobExecutionResult) List(java.util.List) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) HashMultiset(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.HashMultiset) PipelineTranslatorUtils.createOutputMap(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.createOutputMap) ReadTranslation(org.apache.beam.runners.core.construction.ReadTranslation) ExecutableStageDoFnOperator(org.apache.beam.runners.flink.translation.wrappers.streaming.ExecutableStageDoFnOperator) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) DataStreamSource(org.apache.flink.streaming.api.datastream.DataStreamSource) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) ImpulseSourceFunction(org.apache.beam.runners.flink.translation.functions.ImpulseSourceFunction) ViewFn(org.apache.beam.sdk.transforms.ViewFn) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) SdfByteBufferKeySelector(org.apache.beam.runners.flink.translation.wrappers.streaming.SdfByteBufferKeySelector) NativeTransforms(org.apache.beam.runners.core.construction.NativeTransforms) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Configuration(org.apache.flink.configuration.Configuration) Lists(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists) DataStream(org.apache.flink.streaming.api.datastream.DataStream) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) SourceInputFormat(org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat) Collections(java.util.Collections) DataStream(org.apache.flink.streaming.api.datastream.DataStream) ImpulseSourceFunction(org.apache.beam.runners.flink.translation.functions.ImpulseSourceFunction) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) FlatMapFunction(org.apache.flink.api.common.functions.FlatMapFunction) Collector(org.apache.flink.util.Collector)

Aggregations

JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 AutoService (com.google.auto.service.AutoService)1 IOException (java.io.IOException)1 String.format (java.lang.String.format)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 Locale (java.util.Locale)1 Map (java.util.Map)1 Set (java.util.Set)1 TreeMap (java.util.TreeMap)1 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)1 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)1 SystemReduceFn (org.apache.beam.runners.core.SystemReduceFn)1 ExecutableStageTranslation.generateNameFromStagePayload (org.apache.beam.runners.core.construction.ExecutableStageTranslation.generateNameFromStagePayload)1 ModelCoders (org.apache.beam.runners.core.construction.ModelCoders)1