Search in sources :

Example 31 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class ProcessBundleDescriptorsTest method testLengthPrefixingOfInputCoderExecutableStage.

@Test
public void testLengthPrefixingOfInputCoderExecutableStage() throws Exception {
    Pipeline p = Pipeline.create();
    Coder<Void> voidCoder = VoidCoder.of();
    assertThat(ModelCoderRegistrar.isKnownCoder(voidCoder), is(false));
    p.apply("impulse", Impulse.create()).apply(ParDo.of(new DoFn<byte[], Void>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
        }
    })).setCoder(voidCoder).apply(ParDo.of(new DoFn<Void, Void>() {

        @ProcessElement
        public void processElement(ProcessContext context, RestrictionTracker<Void, Void> tracker) {
        }

        @GetInitialRestriction
        public Void getInitialRestriction() {
            return null;
        }

        @NewTracker
        public SomeTracker newTracker(@Restriction Void restriction) {
            return null;
        }
    })).setCoder(voidCoder);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    RunnerApi.Pipeline pipelineWithSdfExpanded = ProtoOverrides.updateTransform(PTransformTranslation.PAR_DO_TRANSFORM_URN, pipelineProto, SplittableParDoExpander.createSizedReplacement());
    FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineWithSdfExpanded);
    Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> stage.getTransforms().stream().anyMatch(transform -> transform.getTransform().getSpec().getUrn().equals(PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN)));
    checkState(optionalStage.isPresent(), "Expected a stage with SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.");
    ExecutableStage stage = optionalStage.get();
    PipelineNode.PCollectionNode inputPCollection = stage.getInputPCollection();
    Map<String, RunnerApi.Coder> stageCoderMap = stage.getComponents().getCodersMap();
    RunnerApi.Coder originalMainInputCoder = stageCoderMap.get(inputPCollection.getPCollection().getCoderId());
    BeamFnApi.ProcessBundleDescriptor pbd = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, Endpoints.ApiServiceDescriptor.getDefaultInstance()).getProcessBundleDescriptor();
    Map<String, RunnerApi.Coder> pbsCoderMap = pbd.getCodersMap();
    RunnerApi.Coder pbsMainInputCoder = pbsCoderMap.get(pbd.getPcollectionsOrThrow(inputPCollection.getId()).getCoderId());
    RunnerApi.Coder kvCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(pbsMainInputCoder).keyCoderId());
    RunnerApi.Coder keyCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(kvCoder).keyCoderId());
    RunnerApi.Coder valueKvCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(kvCoder).valueCoderId());
    RunnerApi.Coder valueCoder = pbsCoderMap.get(ModelCoders.getKvCoderComponents(valueKvCoder).keyCoderId());
    RunnerApi.Coder originalKvCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalMainInputCoder).keyCoderId());
    RunnerApi.Coder originalKeyCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalKvCoder).keyCoderId());
    RunnerApi.Coder originalvalueKvCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalKvCoder).valueCoderId());
    RunnerApi.Coder originalvalueCoder = stageCoderMap.get(ModelCoders.getKvCoderComponents(originalvalueKvCoder).keyCoderId());
    ensureLengthPrefixed(keyCoder, originalKeyCoder, pbsCoderMap);
    ensureLengthPrefixed(valueCoder, originalvalueCoder, pbsCoderMap);
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) StateSpec(org.apache.beam.sdk.state.StateSpec) KV(org.apache.beam.sdk.values.KV) CoderTranslation(org.apache.beam.runners.core.construction.CoderTranslation) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) Coder(org.apache.beam.sdk.coders.Coder) Impulse(org.apache.beam.sdk.transforms.Impulse) GreedyPipelineFuser(org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) Optional(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Optional) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) TimerReference(org.apache.beam.runners.core.construction.graph.TimerReference) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ModelCoderRegistrar(org.apache.beam.runners.core.construction.ModelCoderRegistrar) Pipeline(org.apache.beam.sdk.Pipeline) ProcessContext(org.apache.beam.sdk.transforms.DoFn.ProcessContext) RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) GroupByKey(org.apache.beam.sdk.transforms.GroupByKey) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) ProcessElement(org.apache.beam.sdk.transforms.DoFn.ProcessElement) Test(org.junit.Test) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) Serializable(java.io.Serializable) SplittableParDoExpander(org.apache.beam.runners.core.construction.graph.SplittableParDoExpander) BagState(org.apache.beam.sdk.state.BagState) StateSpecs(org.apache.beam.sdk.state.StateSpecs) ParDo(org.apache.beam.sdk.transforms.ParDo) ProtoOverrides(org.apache.beam.runners.core.construction.graph.ProtoOverrides) Timer(org.apache.beam.sdk.state.Timer) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) TimeDomain(org.apache.beam.sdk.state.TimeDomain) ProcessContext(org.apache.beam.sdk.transforms.DoFn.ProcessContext) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) Pipeline(org.apache.beam.sdk.Pipeline) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) ProcessElement(org.apache.beam.sdk.transforms.DoFn.ProcessElement) Test(org.junit.Test)

Example 32 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class RemoteExecutionTest method testExecutionWithSideInput.

@Test
public void testExecutionWithSideInput() throws Exception {
    launchSdkHarness(PipelineOptionsFactory.create());
    Pipeline p = Pipeline.create();
    addExperiment(p.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
    // TODO(BEAM-10097): Remove experiment once all portable runners support this view type
    addExperiment(p.getOptions().as(ExperimentalOptions.class), "use_runner_v2");
    PCollection<String> input = p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            ctxt.output("zero");
            ctxt.output("one");
            ctxt.output("two");
        }
    })).setCoder(StringUtf8Coder.of());
    PCollectionView<Iterable<String>> iterableView = input.apply("createIterableSideInput", View.asIterable());
    PCollectionView<Map<String, Iterable<String>>> multimapView = input.apply(WithKeys.of("key")).apply("createMultimapSideInput", View.asMultimap());
    input.apply("readSideInput", ParDo.of(new DoFn<String, KV<String, String>>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            for (String value : context.sideInput(iterableView)) {
                context.output(KV.of(context.element(), value));
            }
            for (Map.Entry<String, Iterable<String>> entry : context.sideInput(multimapView).entrySet()) {
                for (String value : entry.getValue()) {
                    context.output(KV.of(context.element(), entry.getKey() + ":" + value));
                }
            }
        }
    }).withSideInputs(iterableView, multimapView)).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
    Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> !stage.getSideInputs().isEmpty());
    checkState(optionalStage.isPresent(), "Expected a stage with side inputs.");
    ExecutableStage stage = optionalStage.get();
    ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
    BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
    Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
    Map<String, Collection<WindowedValue<?>>> outputValues = new HashMap<>();
    Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
    for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
        List<WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
        outputValues.put(remoteOutputCoder.getKey(), outputContents);
        outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputContents::add));
    }
    StateRequestHandler stateRequestHandler = StateRequestHandlers.forSideInputHandlerFactory(descriptor.getSideInputSpecs(), new SideInputHandlerFactory() {

        @Override
        public <V, W extends BoundedWindow> IterableSideInputHandler<V, W> forIterableSideInput(String pTransformId, String sideInputId, Coder<V> elementCoder, Coder<W> windowCoder) {
            return new IterableSideInputHandler<V, W>() {

                @Override
                public Iterable<V> get(W window) {
                    return (Iterable) Arrays.asList("A", "B", "C");
                }

                @Override
                public Coder<V> elementCoder() {
                    return elementCoder;
                }
            };
        }

        @Override
        public <K, V, W extends BoundedWindow> MultimapSideInputHandler<K, V, W> forMultimapSideInput(String pTransformId, String sideInputId, KvCoder<K, V> elementCoder, Coder<W> windowCoder) {
            return new MultimapSideInputHandler<K, V, W>() {

                @Override
                public Iterable<K> get(W window) {
                    return (Iterable) Arrays.asList("key1", "key2");
                }

                @Override
                public Iterable<V> get(K key, W window) {
                    if ("key1".equals(key)) {
                        return (Iterable) Arrays.asList("H", "I", "J");
                    } else if ("key2".equals(key)) {
                        return (Iterable) Arrays.asList("M", "N", "O");
                    }
                    return Collections.emptyList();
                }

                @Override
                public Coder<K> keyCoder() {
                    return elementCoder.getKeyCoder();
                }

                @Override
                public Coder<V> valueCoder() {
                    return elementCoder.getValueCoder();
                }
            };
        }
    });
    BundleProgressHandler progressHandler = BundleProgressHandler.ignored();
    try (RemoteBundle bundle = processor.newBundle(outputReceivers, stateRequestHandler, progressHandler)) {
        Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow("X"));
        Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow("Y"));
    }
    for (Collection<WindowedValue<?>> windowedValues : outputValues.values()) {
        assertThat(windowedValues, containsInAnyOrder(valueInGlobalWindow(KV.of("X", "A")), valueInGlobalWindow(KV.of("X", "B")), valueInGlobalWindow(KV.of("X", "C")), valueInGlobalWindow(KV.of("X", "key1:H")), valueInGlobalWindow(KV.of("X", "key1:I")), valueInGlobalWindow(KV.of("X", "key1:J")), valueInGlobalWindow(KV.of("X", "key2:M")), valueInGlobalWindow(KV.of("X", "key2:N")), valueInGlobalWindow(KV.of("X", "key2:O")), valueInGlobalWindow(KV.of("Y", "A")), valueInGlobalWindow(KV.of("Y", "B")), valueInGlobalWindow(KV.of("Y", "C")), valueInGlobalWindow(KV.of("Y", "key1:H")), valueInGlobalWindow(KV.of("Y", "key1:I")), valueInGlobalWindow(KV.of("Y", "key1:J")), valueInGlobalWindow(KV.of("Y", "key2:M")), valueInGlobalWindow(KV.of("Y", "key2:N")), valueInGlobalWindow(KV.of("Y", "key2:O"))));
    }
}
Also used : StateRequestHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandler) IsEmptyIterable(org.hamcrest.collection.IsEmptyIterable) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Entry(java.util.Map.Entry) BundleProcessor(org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) SideInputHandlerFactory(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.SideInputHandlerFactory) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ExecutableProcessBundleDescriptor(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) IterableSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.IterableSideInputHandler) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) MultimapSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.MultimapSideInputHandler) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Test(org.junit.Test)

Example 33 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class RemoteExecutionTest method testExecutionWithSideInputCaching.

@Test
public void testExecutionWithSideInputCaching() throws Exception {
    Pipeline p = Pipeline.create();
    addExperiment(p.getOptions().as(ExperimentalOptions.class), "beam_fn_api");
    // TODO(BEAM-10097): Remove experiment once all portable runners support this view type
    addExperiment(p.getOptions().as(ExperimentalOptions.class), "use_runner_v2");
    launchSdkHarness(p.getOptions());
    PCollection<String> input = p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            ctxt.output("zero");
            ctxt.output("one");
            ctxt.output("two");
        }
    })).setCoder(StringUtf8Coder.of());
    PCollectionView<Iterable<String>> iterableView = input.apply("createIterableSideInput", View.asIterable());
    PCollectionView<Map<String, Iterable<String>>> multimapView = input.apply(WithKeys.of("key")).apply("createMultimapSideInput", View.asMultimap());
    input.apply("readSideInput", ParDo.of(new DoFn<String, KV<String, String>>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            for (String value : context.sideInput(iterableView)) {
                context.output(KV.of(context.element(), value));
            }
            for (Map.Entry<String, Iterable<String>> entry : context.sideInput(multimapView).entrySet()) {
                for (String value : entry.getValue()) {
                    context.output(KV.of(context.element(), entry.getKey() + ":" + value));
                }
            }
        }
    }).withSideInputs(iterableView, multimapView)).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("gbk", GroupByKey.create());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
    Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> !stage.getSideInputs().isEmpty());
    checkState(optionalStage.isPresent(), "Expected a stage with side inputs.");
    ExecutableStage stage = optionalStage.get();
    ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
    BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator);
    Map<String, Coder> remoteOutputCoders = descriptor.getRemoteOutputCoders();
    Map<String, Collection<WindowedValue<?>>> outputValues = new HashMap<>();
    Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
    for (Entry<String, Coder> remoteOutputCoder : remoteOutputCoders.entrySet()) {
        List<WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
        outputValues.put(remoteOutputCoder.getKey(), outputContents);
        outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputContents::add));
    }
    StoringStateRequestHandler stateRequestHandler = new StoringStateRequestHandler(StateRequestHandlers.forSideInputHandlerFactory(descriptor.getSideInputSpecs(), new SideInputHandlerFactory() {

        @Override
        public <V, W extends BoundedWindow> IterableSideInputHandler<V, W> forIterableSideInput(String pTransformId, String sideInputId, Coder<V> elementCoder, Coder<W> windowCoder) {
            return new IterableSideInputHandler<V, W>() {

                @Override
                public Iterable<V> get(W window) {
                    return (Iterable) Arrays.asList("A", "B", "C");
                }

                @Override
                public Coder<V> elementCoder() {
                    return elementCoder;
                }
            };
        }

        @Override
        public <K, V, W extends BoundedWindow> MultimapSideInputHandler<K, V, W> forMultimapSideInput(String pTransformId, String sideInputId, KvCoder<K, V> elementCoder, Coder<W> windowCoder) {
            return new MultimapSideInputHandler<K, V, W>() {

                @Override
                public Iterable<K> get(W window) {
                    return (Iterable) Arrays.asList("key1", "key2");
                }

                @Override
                public Iterable<V> get(K key, W window) {
                    if ("key1".equals(key)) {
                        return (Iterable) Arrays.asList("H", "I", "J");
                    } else if ("key2".equals(key)) {
                        return (Iterable) Arrays.asList("M", "N", "O");
                    }
                    return Collections.emptyList();
                }

                @Override
                public Coder<K> keyCoder() {
                    return elementCoder.getKeyCoder();
                }

                @Override
                public Coder<V> valueCoder() {
                    return elementCoder.getValueCoder();
                }
            };
        }
    }));
    String transformId = Iterables.get(stage.getSideInputs(), 0).transform().getId();
    stateRequestHandler.addCacheToken(BeamFnApi.ProcessBundleRequest.CacheToken.newBuilder().setSideInput(BeamFnApi.ProcessBundleRequest.CacheToken.SideInput.newBuilder().setSideInputId(iterableView.getTagInternal().getId()).setTransformId(transformId).build()).setToken(ByteString.copyFromUtf8("IterableSideInputToken")).build());
    stateRequestHandler.addCacheToken(BeamFnApi.ProcessBundleRequest.CacheToken.newBuilder().setSideInput(BeamFnApi.ProcessBundleRequest.CacheToken.SideInput.newBuilder().setSideInputId(multimapView.getTagInternal().getId()).setTransformId(transformId).build()).setToken(ByteString.copyFromUtf8("MulitmapSideInputToken")).build());
    BundleProgressHandler progressHandler = BundleProgressHandler.ignored();
    try (RemoteBundle bundle = processor.newBundle(outputReceivers, stateRequestHandler, progressHandler)) {
        Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow("X"));
    }
    try (RemoteBundle bundle = processor.newBundle(outputReceivers, stateRequestHandler, progressHandler)) {
        Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow("Y"));
    }
    for (Collection<WindowedValue<?>> windowedValues : outputValues.values()) {
        assertThat(windowedValues, containsInAnyOrder(valueInGlobalWindow(KV.of("X", "A")), valueInGlobalWindow(KV.of("X", "B")), valueInGlobalWindow(KV.of("X", "C")), valueInGlobalWindow(KV.of("X", "key1:H")), valueInGlobalWindow(KV.of("X", "key1:I")), valueInGlobalWindow(KV.of("X", "key1:J")), valueInGlobalWindow(KV.of("X", "key2:M")), valueInGlobalWindow(KV.of("X", "key2:N")), valueInGlobalWindow(KV.of("X", "key2:O")), valueInGlobalWindow(KV.of("Y", "A")), valueInGlobalWindow(KV.of("Y", "B")), valueInGlobalWindow(KV.of("Y", "C")), valueInGlobalWindow(KV.of("Y", "key1:H")), valueInGlobalWindow(KV.of("Y", "key1:I")), valueInGlobalWindow(KV.of("Y", "key1:J")), valueInGlobalWindow(KV.of("Y", "key2:M")), valueInGlobalWindow(KV.of("Y", "key2:N")), valueInGlobalWindow(KV.of("Y", "key2:O"))));
    }
    // Expect the following requests for the first bundle:
    // * one to read iterable side input
    // * one to read keys from multimap side input
    // * one to read key1 iterable from multimap side input
    // * one to read key2 iterable from multimap side input
    assertEquals(4, stateRequestHandler.receivedRequests.size());
    assertEquals(stateRequestHandler.receivedRequests.get(0).getStateKey().getIterableSideInput(), BeamFnApi.StateKey.IterableSideInput.newBuilder().setSideInputId(iterableView.getTagInternal().getId()).setTransformId(transformId).build());
    assertEquals(stateRequestHandler.receivedRequests.get(1).getStateKey().getMultimapKeysSideInput(), BeamFnApi.StateKey.MultimapKeysSideInput.newBuilder().setSideInputId(multimapView.getTagInternal().getId()).setTransformId(transformId).build());
    assertEquals(stateRequestHandler.receivedRequests.get(2).getStateKey().getMultimapSideInput(), BeamFnApi.StateKey.MultimapSideInput.newBuilder().setSideInputId(multimapView.getTagInternal().getId()).setTransformId(transformId).setKey(encode("key1")).build());
    assertEquals(stateRequestHandler.receivedRequests.get(3).getStateKey().getMultimapSideInput(), BeamFnApi.StateKey.MultimapSideInput.newBuilder().setSideInputId(multimapView.getTagInternal().getId()).setTransformId(transformId).setKey(encode("key2")).build());
}
Also used : IsEmptyIterable(org.hamcrest.collection.IsEmptyIterable) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Entry(java.util.Map.Entry) BundleProcessor(org.apache.beam.runners.fnexecution.control.SdkHarnessClient.BundleProcessor) WindowedValue(org.apache.beam.sdk.util.WindowedValue) KV(org.apache.beam.sdk.values.KV) SideInputHandlerFactory(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.SideInputHandlerFactory) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ExecutableProcessBundleDescriptor(org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.ExecutableProcessBundleDescriptor) KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) KvCoder(org.apache.beam.sdk.coders.KvCoder) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) IterableSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.IterableSideInputHandler) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) MultimapSideInputHandler(org.apache.beam.runners.fnexecution.state.StateRequestHandlers.MultimapSideInputHandler) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) Test(org.junit.Test)

Example 34 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class ParDoBoundMultiTranslator method doTranslate.

// static for serializing anonymous functions
private static <InT, OutT> void doTranslate(ParDo.MultiOutput<InT, OutT> transform, TransformHierarchy.Node node, TranslationContext ctx) {
    final PCollection<? extends InT> input = ctx.getInput(transform);
    final Map<TupleTag<?>, Coder<?>> outputCoders = ctx.getCurrentTransform().getOutputs().entrySet().stream().filter(e -> e.getValue() instanceof PCollection).collect(Collectors.toMap(e -> e.getKey(), e -> ((PCollection<?>) e.getValue()).getCoder()));
    final Coder<?> keyCoder = StateUtils.isStateful(transform.getFn()) ? ((KvCoder<?, ?>) input.getCoder()).getKeyCoder() : null;
    if (DoFnSignatures.isSplittable(transform.getFn())) {
        throw new UnsupportedOperationException("Splittable DoFn is not currently supported");
    }
    if (DoFnSignatures.requiresTimeSortedInput(transform.getFn())) {
        throw new UnsupportedOperationException("@RequiresTimeSortedInput annotation is not currently supported");
    }
    final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStream(input);
    final List<MessageStream<OpMessage<InT>>> sideInputStreams = transform.getSideInputs().values().stream().map(ctx::<InT>getViewStream).collect(Collectors.toList());
    final ArrayList<Map.Entry<TupleTag<?>, PCollection<?>>> outputs = new ArrayList<>(node.getOutputs().entrySet());
    final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>();
    final Map<Integer, PCollection<?>> indexToPCollectionMap = new HashMap<>();
    for (int index = 0; index < outputs.size(); ++index) {
        final Map.Entry<TupleTag<?>, PCollection<?>> taggedOutput = outputs.get(index);
        tagToIndexMap.put(taggedOutput.getKey(), index);
        if (!(taggedOutput.getValue() instanceof PCollection)) {
            throw new IllegalArgumentException("Expected side output to be PCollection, but was: " + taggedOutput.getValue());
        }
        final PCollection<?> sideOutputCollection = taggedOutput.getValue();
        indexToPCollectionMap.put(index, sideOutputCollection);
    }
    final HashMap<String, PCollectionView<?>> idToPValueMap = new HashMap<>();
    for (PCollectionView<?> view : transform.getSideInputs().values()) {
        idToPValueMap.put(ctx.getViewId(view), view);
    }
    DoFnSchemaInformation doFnSchemaInformation;
    doFnSchemaInformation = ParDoTranslation.getSchemaInformation(ctx.getCurrentTransform());
    Map<String, PCollectionView<?>> sideInputMapping = ParDoTranslation.getSideInputMapping(ctx.getCurrentTransform());
    final DoFnOp<InT, OutT, RawUnionValue> op = new DoFnOp<>(transform.getMainOutputTag(), transform.getFn(), keyCoder, (Coder<InT>) input.getCoder(), null, outputCoders, transform.getSideInputs().values(), transform.getAdditionalOutputTags().getAll(), input.getWindowingStrategy(), idToPValueMap, new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), ctx.getTransformFullName(), ctx.getTransformId(), input.isBounded(), false, null, null, Collections.emptyMap(), doFnSchemaInformation, sideInputMapping);
    final MessageStream<OpMessage<InT>> mergedStreams;
    if (sideInputStreams.isEmpty()) {
        mergedStreams = inputStream;
    } else {
        MessageStream<OpMessage<InT>> mergedSideInputStreams = MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn());
        mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams));
    }
    final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream = mergedStreams.flatMapAsync(OpAdapter.adapt(op));
    for (int outputIndex : tagToIndexMap.values()) {
        @SuppressWarnings("unchecked") final MessageStream<OpMessage<OutT>> outputStream = taggedOutputStream.filter(message -> message.getType() != OpMessage.Type.ELEMENT || message.getElement().getValue().getUnionTag() == outputIndex).flatMapAsync(OpAdapter.adapt(new RawUnionValueToValue()));
        ctx.registerMessageStream(indexToPCollectionMap.get(outputIndex), outputStream);
    }
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) PCollectionViews(org.apache.beam.sdk.values.PCollectionViews) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Iterators(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterators) StateUtils(org.apache.beam.runners.samza.util.StateUtils) KvCoder(org.apache.beam.sdk.coders.KvCoder) Collection(java.util.Collection) ServiceLoader(java.util.ServiceLoader) Collectors(java.util.stream.Collectors) TransformHierarchy(org.apache.beam.sdk.runners.TransformHierarchy) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) OpEmitter(org.apache.beam.runners.samza.runtime.OpEmitter) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) SamzaPipelineTranslatorUtils(org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) OpAdapter(org.apache.beam.runners.samza.runtime.OpAdapter) DoFnOp(org.apache.beam.runners.samza.runtime.DoFnOp) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) ViewFn(org.apache.beam.sdk.transforms.ViewFn) TupleTag(org.apache.beam.sdk.values.TupleTag) SamzaDoFnInvokerRegistrar(org.apache.beam.runners.samza.runtime.SamzaDoFnInvokerRegistrar) WindowUtils(org.apache.beam.runners.samza.util.WindowUtils) SideInputId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId) ParDoTranslation(org.apache.beam.runners.core.construction.ParDoTranslation) MessageStream(org.apache.samza.operators.MessageStream) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) QueryablePipeline(org.apache.beam.runners.core.construction.graph.QueryablePipeline) Op(org.apache.beam.runners.samza.runtime.Op) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Iterator(java.util.Iterator) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) Collections(java.util.Collections) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) DoFnOp(org.apache.beam.runners.samza.runtime.DoFnOp) MessageStream(org.apache.samza.operators.MessageStream) KvCoder(org.apache.beam.sdk.coders.KvCoder) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) Coder(org.apache.beam.sdk.coders.Coder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PCollection(org.apache.beam.sdk.values.PCollection) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) Map(java.util.Map) HashMap(java.util.HashMap)

Example 35 with KvCoder

use of org.apache.beam.sdk.coders.KvCoder in project beam by apache.

the class ParDoBoundMultiTranslator method doTranslatePortable.

// static for serializing anonymous functions
private static <InT, OutT> void doTranslatePortable(PipelineNode.PTransformNode transform, QueryablePipeline pipeline, PortableTranslationContext ctx) {
    Map<String, String> outputs = transform.getTransform().getOutputsMap();
    final RunnerApi.ExecutableStagePayload stagePayload;
    try {
        stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getTransform().getSpec().getPayload());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String inputId = stagePayload.getInput();
    final MessageStream<OpMessage<InT>> inputStream = ctx.getMessageStreamById(inputId);
    // Analyze side inputs
    final List<MessageStream<OpMessage<Iterable<?>>>> sideInputStreams = new ArrayList<>();
    final Map<SideInputId, PCollectionView<?>> sideInputMapping = new HashMap<>();
    final Map<String, PCollectionView<?>> idToViewMapping = new HashMap<>();
    final RunnerApi.Components components = stagePayload.getComponents();
    for (SideInputId sideInputId : stagePayload.getSideInputsList()) {
        final String sideInputCollectionId = components.getTransformsOrThrow(sideInputId.getTransformId()).getInputsOrThrow(sideInputId.getLocalName());
        final WindowingStrategy<?, BoundedWindow> windowingStrategy = WindowUtils.getWindowStrategy(sideInputCollectionId, components);
        final WindowedValue.WindowedValueCoder<?> coder = (WindowedValue.WindowedValueCoder) instantiateCoder(sideInputCollectionId, components);
        // Create a runner-side view
        final PCollectionView<?> view = createPCollectionView(sideInputId, coder, windowingStrategy);
        // Use GBK to aggregate the side inputs and then broadcast it out
        final MessageStream<OpMessage<Iterable<?>>> broadcastSideInput = groupAndBroadcastSideInput(sideInputId, sideInputCollectionId, components.getPcollectionsOrThrow(sideInputCollectionId), (WindowingStrategy) windowingStrategy, coder, ctx);
        sideInputStreams.add(broadcastSideInput);
        sideInputMapping.put(sideInputId, view);
        idToViewMapping.put(getSideInputUniqueId(sideInputId), view);
    }
    final Map<TupleTag<?>, Integer> tagToIndexMap = new HashMap<>();
    final Map<Integer, String> indexToIdMap = new HashMap<>();
    final Map<String, TupleTag<?>> idToTupleTagMap = new HashMap<>();
    // first output as the main output
    final TupleTag<OutT> mainOutputTag = outputs.isEmpty() ? null : new TupleTag(outputs.keySet().iterator().next());
    AtomicInteger index = new AtomicInteger(0);
    outputs.keySet().iterator().forEachRemaining(outputName -> {
        TupleTag<?> tupleTag = new TupleTag<>(outputName);
        tagToIndexMap.put(tupleTag, index.get());
        String collectionId = outputs.get(outputName);
        indexToIdMap.put(index.get(), collectionId);
        idToTupleTagMap.put(collectionId, tupleTag);
        index.incrementAndGet();
    });
    WindowedValue.WindowedValueCoder<InT> windowedInputCoder = WindowUtils.instantiateWindowedCoder(inputId, pipeline.getComponents());
    // TODO: support schema and side inputs for portable runner
    // Note: transform.getTransform() is an ExecutableStage, not ParDo, so we need to extract
    // these info from its components.
    final DoFnSchemaInformation doFnSchemaInformation = null;
    final RunnerApi.PCollection input = pipeline.getComponents().getPcollectionsOrThrow(inputId);
    final PCollection.IsBounded isBounded = SamzaPipelineTranslatorUtils.isBounded(input);
    final Coder<?> keyCoder = StateUtils.isStateful(stagePayload) ? ((KvCoder) ((WindowedValue.FullWindowedValueCoder) windowedInputCoder).getValueCoder()).getKeyCoder() : null;
    final DoFnOp<InT, OutT, RawUnionValue> op = new DoFnOp<>(mainOutputTag, new NoOpDoFn<>(), keyCoder, // input coder not in use
    windowedInputCoder.getValueCoder(), windowedInputCoder, // output coders not in use
    Collections.emptyMap(), new ArrayList<>(sideInputMapping.values()), // used by java runner only
    new ArrayList<>(idToTupleTagMap.values()), WindowUtils.getWindowStrategy(inputId, stagePayload.getComponents()), idToViewMapping, new DoFnOp.MultiOutputManagerFactory(tagToIndexMap), ctx.getTransformFullName(), ctx.getTransformId(), isBounded, true, stagePayload, ctx.getJobInfo(), idToTupleTagMap, doFnSchemaInformation, sideInputMapping);
    final MessageStream<OpMessage<InT>> mergedStreams;
    if (sideInputStreams.isEmpty()) {
        mergedStreams = inputStream;
    } else {
        MessageStream<OpMessage<InT>> mergedSideInputStreams = MessageStream.mergeAll(sideInputStreams).flatMap(new SideInputWatermarkFn());
        mergedStreams = inputStream.merge(Collections.singletonList(mergedSideInputStreams));
    }
    final MessageStream<OpMessage<RawUnionValue>> taggedOutputStream = mergedStreams.flatMapAsync(OpAdapter.adapt(op));
    for (int outputIndex : tagToIndexMap.values()) {
        @SuppressWarnings("unchecked") final MessageStream<OpMessage<OutT>> outputStream = taggedOutputStream.filter(message -> message.getType() != OpMessage.Type.ELEMENT || message.getElement().getValue().getUnionTag() == outputIndex).flatMapAsync(OpAdapter.adapt(new RawUnionValueToValue()));
        ctx.registerMessageStream(indexToIdMap.get(outputIndex), outputStream);
    }
}
Also used : WindowedValue(org.apache.beam.sdk.util.WindowedValue) PCollectionViews(org.apache.beam.sdk.values.PCollectionViews) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) WatermarkFunction(org.apache.samza.operators.functions.WatermarkFunction) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) Iterators(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterators) StateUtils(org.apache.beam.runners.samza.util.StateUtils) KvCoder(org.apache.beam.sdk.coders.KvCoder) Collection(java.util.Collection) ServiceLoader(java.util.ServiceLoader) Collectors(java.util.stream.Collectors) TransformHierarchy(org.apache.beam.sdk.runners.TransformHierarchy) FlatMapFunction(org.apache.samza.operators.functions.FlatMapFunction) List(java.util.List) ParDo(org.apache.beam.sdk.transforms.ParDo) TypeDescriptors(org.apache.beam.sdk.values.TypeDescriptors) OpEmitter(org.apache.beam.runners.samza.runtime.OpEmitter) PipelineTranslatorUtils.instantiateCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.instantiateCoder) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) SamzaPipelineTranslatorUtils(org.apache.beam.runners.samza.util.SamzaPipelineTranslatorUtils) KV(org.apache.beam.sdk.values.KV) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) OpAdapter(org.apache.beam.runners.samza.runtime.OpAdapter) DoFnOp(org.apache.beam.runners.samza.runtime.DoFnOp) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) ViewFn(org.apache.beam.sdk.transforms.ViewFn) TupleTag(org.apache.beam.sdk.values.TupleTag) SamzaDoFnInvokerRegistrar(org.apache.beam.runners.samza.runtime.SamzaDoFnInvokerRegistrar) WindowUtils(org.apache.beam.runners.samza.util.WindowUtils) SideInputId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId) ParDoTranslation(org.apache.beam.runners.core.construction.ParDoTranslation) MessageStream(org.apache.samza.operators.MessageStream) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) QueryablePipeline(org.apache.beam.runners.core.construction.graph.QueryablePipeline) Op(org.apache.beam.runners.samza.runtime.Op) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) Iterator(java.util.Iterator) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) SamzaPipelineOptions(org.apache.beam.runners.samza.SamzaPipelineOptions) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) Collections(java.util.Collections) OpMessage(org.apache.beam.runners.samza.runtime.OpMessage) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) MessageStream(org.apache.samza.operators.MessageStream) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SideInputId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) RunnerPCollectionView(org.apache.beam.runners.core.construction.RunnerPCollectionView) PCollectionView(org.apache.beam.sdk.values.PCollectionView) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DoFnOp(org.apache.beam.runners.samza.runtime.DoFnOp) IOException(java.io.IOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) PCollection(org.apache.beam.sdk.values.PCollection)

Aggregations

KvCoder (org.apache.beam.sdk.coders.KvCoder)44 Coder (org.apache.beam.sdk.coders.Coder)26 WindowedValue (org.apache.beam.sdk.util.WindowedValue)25 KV (org.apache.beam.sdk.values.KV)21 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)20 Map (java.util.Map)17 List (java.util.List)16 ArrayList (java.util.ArrayList)15 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)15 IOException (java.io.IOException)14 HashMap (java.util.HashMap)14 WindowingStrategy (org.apache.beam.sdk.values.WindowingStrategy)13 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)11 IterableCoder (org.apache.beam.sdk.coders.IterableCoder)10 VoidCoder (org.apache.beam.sdk.coders.VoidCoder)10 PCollectionView (org.apache.beam.sdk.values.PCollectionView)10 Test (org.junit.Test)10 WindowedValueCoder (org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder)8 ViewFn (org.apache.beam.sdk.transforms.ViewFn)7 PCollection (org.apache.beam.sdk.values.PCollection)7