Search in sources :

Example 1 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ProcessBundleDescriptorsTest method testLengthPrefixingOfKeyCoderInStatefulExecutableStage.

/**
 * Tests that a stateful stage will wrap the key coder of a stateful transform in a
 * LengthPrefixCoder.
 */
@Test
public void testLengthPrefixingOfKeyCoderInStatefulExecutableStage() throws Exception {
    // Add another stateful stage with a non-standard key coder
    Pipeline p = Pipeline.create();
    Coder<Void> keycoder = VoidCoder.of();
    assertThat(ModelCoderRegistrar.isKnownCoder(keycoder), is(false));
    p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], KV<Void, String>>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
        }
    })).setCoder(KvCoder.of(keycoder, StringUtf8Coder.of())).apply("userState", ParDo.of(new DoFn<KV<Void, String>, KV<Void, String>>() {

        @StateId("stateId")
        private final StateSpec<BagState<String>> bufferState = StateSpecs.bag(StringUtf8Coder.of());

        @TimerId("timerId")
        private final TimerSpec timerSpec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @ProcessElement
        public void processElement(@Element KV<Void, String> element, @StateId("stateId") BagState<String> state, @TimerId("timerId") Timer timer, OutputReceiver<KV<Void, String>> r) {
        }

        @OnTimer("timerId")
        public void onTimer() {
        }
    })).apply("gbk", GroupByKey.create());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
    Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> stage.getUserStates().stream().anyMatch(spec -> spec.localName().equals("stateId")));
    checkState(optionalStage.isPresent(), "Expected a stage with user state.");
    ExecutableStage stage = optionalStage.get();
    PipelineNode.PCollectionNode inputPCollection = stage.getInputPCollection();
    // Ensure original key coder is not a LengthPrefixCoder
    Map<String, RunnerApi.Coder> stageCoderMap = stage.getComponents().getCodersMap();
    RunnerApi.Coder originalMainInputCoder = stageCoderMap.get(inputPCollection.getPCollection().getCoderId());
    String originalKeyCoderId = ModelCoders.getKvCoderComponents(originalMainInputCoder).keyCoderId();
    RunnerApi.Coder originalKeyCoder = stageCoderMap.get(originalKeyCoderId);
    assertThat(originalKeyCoder.getSpec().getUrn(), is(CoderTranslation.JAVA_SERIALIZED_CODER_URN));
    // Now create ProcessBundleDescriptor and check for the LengthPrefixCoder around the key coder
    BeamFnApi.ProcessBundleDescriptor pbd = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, Endpoints.ApiServiceDescriptor.getDefaultInstance()).getProcessBundleDescriptor();
    Map<String, RunnerApi.Coder> pbsCoderMap = pbd.getCodersMap();
    RunnerApi.Coder pbsMainInputCoder = pbsCoderMap.get(pbd.getPcollectionsOrThrow(inputPCollection.getId()).getCoderId());
    String keyCoderId = ModelCoders.getKvCoderComponents(pbsMainInputCoder).keyCoderId();
    RunnerApi.Coder keyCoder = pbsCoderMap.get(keyCoderId);
    ensureLengthPrefixed(keyCoder, originalKeyCoder, pbsCoderMap);
    TimerReference timerRef = Iterables.getOnlyElement(stage.getTimers());
    String timerTransformId = timerRef.transform().getId();
    RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(pbd.getTransformsOrThrow(timerTransformId).getSpec().getPayload());
    RunnerApi.TimerFamilySpec timerSpec = parDoPayload.getTimerFamilySpecsOrThrow(timerRef.localName());
    RunnerApi.Coder timerCoder = pbsCoderMap.get(timerSpec.getTimerFamilyCoderId());
    String timerKeyCoderId = timerCoder.getComponentCoderIds(0);
    RunnerApi.Coder timerKeyCoder = pbsCoderMap.get(timerKeyCoderId);
    ensureLengthPrefixed(timerKeyCoder, originalKeyCoder, pbsCoderMap);
}
Also used : CoreMatchers.is(org.hamcrest.CoreMatchers.is) Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) StateSpec(org.apache.beam.sdk.state.StateSpec) KV(org.apache.beam.sdk.values.KV) CoderTranslation(org.apache.beam.runners.core.construction.CoderTranslation) TimerSpecs(org.apache.beam.sdk.state.TimerSpecs) Coder(org.apache.beam.sdk.coders.Coder) Impulse(org.apache.beam.sdk.transforms.Impulse) GreedyPipelineFuser(org.apache.beam.runners.core.construction.graph.GreedyPipelineFuser) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) Optional(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Optional) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) TimerReference(org.apache.beam.runners.core.construction.graph.TimerReference) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) ModelCoderRegistrar(org.apache.beam.runners.core.construction.ModelCoderRegistrar) Pipeline(org.apache.beam.sdk.Pipeline) ProcessContext(org.apache.beam.sdk.transforms.DoFn.ProcessContext) RestrictionTracker(org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) KvCoder(org.apache.beam.sdk.coders.KvCoder) GroupByKey(org.apache.beam.sdk.transforms.GroupByKey) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) ProcessElement(org.apache.beam.sdk.transforms.DoFn.ProcessElement) Test(org.junit.Test) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) ModelCoders(org.apache.beam.runners.core.construction.ModelCoders) Serializable(java.io.Serializable) SplittableParDoExpander(org.apache.beam.runners.core.construction.graph.SplittableParDoExpander) BagState(org.apache.beam.sdk.state.BagState) StateSpecs(org.apache.beam.sdk.state.StateSpecs) ParDo(org.apache.beam.sdk.transforms.ParDo) ProtoOverrides(org.apache.beam.runners.core.construction.graph.ProtoOverrides) Timer(org.apache.beam.sdk.state.Timer) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) TimeDomain(org.apache.beam.sdk.state.TimeDomain) TimerReference(org.apache.beam.runners.core.construction.graph.TimerReference) ProcessContext(org.apache.beam.sdk.transforms.DoFn.ProcessContext) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) BagState(org.apache.beam.sdk.state.BagState) TimerSpec(org.apache.beam.sdk.state.TimerSpec) Coder(org.apache.beam.sdk.coders.Coder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) VoidCoder(org.apache.beam.sdk.coders.VoidCoder) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) KV(org.apache.beam.sdk.values.KV) PipelineNode(org.apache.beam.runners.core.construction.graph.PipelineNode) Pipeline(org.apache.beam.sdk.Pipeline) FusedPipeline(org.apache.beam.runners.core.construction.graph.FusedPipeline) Timer(org.apache.beam.sdk.state.Timer) ProcessElement(org.apache.beam.sdk.transforms.DoFn.ProcessElement) Test(org.junit.Test)

Example 2 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ProcessBundleHandlerTest method testOrderOfSetupTeardownCalls.

@Test
public void testOrderOfSetupTeardownCalls() throws Exception {
    DoFnWithExecutionInformation doFnWithExecutionInformation = DoFnWithExecutionInformation.of(new TestDoFn(), TestDoFn.mainOutput, Collections.emptyMap(), DoFnSchemaInformation.create());
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnWithExecutionInformation))).build();
    RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.newBuilder().setDoFn(functionSpec).build();
    BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = BeamFnApi.ProcessBundleDescriptor.newBuilder().putTransforms("2L", PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).putOutputs("2L-output", "2L-output-pc").build()).putTransforms("3L", PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString())).putInputs("3L-input", "2L-output-pc").build()).putPcollections("2L-output-pc", PCollection.newBuilder().setWindowingStrategyId("window-strategy").setCoderId("2L-output-coder").setIsBounded(IsBounded.Enum.BOUNDED).build()).putWindowingStrategies("window-strategy", WindowingStrategy.newBuilder().setWindowCoderId("window-strategy-coder").setWindowFn(RunnerApi.FunctionSpec.newBuilder().setUrn("beam:window_fn:global_windows:v1")).setOutputTime(RunnerApi.OutputTime.Enum.END_OF_WINDOW).setAccumulationMode(RunnerApi.AccumulationMode.Enum.ACCUMULATING).setTrigger(RunnerApi.Trigger.newBuilder().setAlways(RunnerApi.Trigger.Always.getDefaultInstance())).setClosingBehavior(RunnerApi.ClosingBehavior.Enum.EMIT_ALWAYS).setOnTimeBehavior(RunnerApi.OnTimeBehavior.Enum.FIRE_ALWAYS).build()).putCoders("2L-output-coder", CoderTranslation.toProto(StringUtf8Coder.of()).getCoder()).putCoders("window-strategy-coder", Coder.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(ModelCoders.GLOBAL_WINDOW_CODER_URN).build()).build()).build();
    Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
    Map<String, PTransformRunnerFactory> urnToPTransformRunnerFactoryMap = Maps.newHashMap(REGISTERED_RUNNER_FACTORIES);
    urnToPTransformRunnerFactoryMap.put(DATA_INPUT_URN, (context) -> null);
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateClient */
    null, /* finalizeBundleHandler */
    new ShortIdMap(), urnToPTransformRunnerFactoryMap, Caches.noop(), new BundleProcessorCache());
    handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("998L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
    handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("999L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
    handler.shutdown();
    // setup and teardown should occur only once when processing multiple bundles for the same
    // descriptor
    assertThat(TestDoFn.orderOfOperations, contains("setUp", "startBundle", "finishBundle", "startBundle", "finishBundle", "tearDown"));
}
Also used : ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) Test(org.junit.Test)

Example 3 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method translateParDo.

public static ParDoPayload translateParDo(AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components) throws IOException {
    final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
    final Pipeline pipeline = appliedPTransform.getPipeline();
    final DoFn<?, ?> doFn = parDo.getFn();
    // Get main input.
    Set<String> allInputs = appliedPTransform.getInputs().keySet().stream().map(TupleTag::getId).collect(Collectors.toSet());
    Set<String> sideInputs = parDo.getSideInputs().values().stream().map(s -> s.getTagInternal().getId()).collect(Collectors.toSet());
    String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
    PCollection<?> mainInput = (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));
    final DoFnSchemaInformation doFnSchemaInformation = ParDo.getDoFnSchemaInformation(doFn, mainInput);
    return translateParDo((ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration) DoFnSignatures.getTimerSpecOrThrow(org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getTimerSpecOrThrow) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) Sets(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) DoFnInvoker(org.apache.beam.sdk.transforms.reflect.DoFnInvoker) SPLITTABLE_PROCESS_ELEMENTS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN) SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN) KvCoder(org.apache.beam.sdk.coders.KvCoder) PAR_DO_TRANSFORM_URN(org.apache.beam.runners.core.construction.PTransformTranslation.PAR_DO_TRANSFORM_URN) Set(java.util.Set) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) StandardUserStateTypes(org.apache.beam.model.pipeline.v1.RunnerApi.StandardUserStateTypes) Collectors(java.util.stream.Collectors) SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN) TransformTranslator(org.apache.beam.runners.core.construction.PTransformTranslation.TransformTranslator) List(java.util.List) StandardRequirements(org.apache.beam.model.pipeline.v1.RunnerApi.StandardRequirements) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) AutoValue(com.google.auto.value.AutoValue) DoFnInvokers(org.apache.beam.sdk.transforms.reflect.DoFnInvokers) DoFnSignatures.getStateSpecOrThrow(org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getStateSpecOrThrow) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) Combine(org.apache.beam.sdk.transforms.Combine) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) SPLITTABLE_PAIR_WITH_RESTRICTION_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN) WindowMappingFn(org.apache.beam.sdk.transforms.windowing.WindowMappingFn) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput) TimerSpec(org.apache.beam.sdk.state.TimerSpec) ViewFn(org.apache.beam.sdk.transforms.ViewFn) TupleTag(org.apache.beam.sdk.values.TupleTag) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) Nullable(javax.annotation.Nullable) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) TransformPayloadTranslator(org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator) StateSpecs(org.apache.beam.sdk.state.StateSpecs) TimerDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) PCollection(org.apache.beam.sdk.values.PCollection) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline)

Example 4 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method translateParDo.

/**
 * Translate a ParDo.
 */
public static <InputT> ParDoPayload translateParDo(ParDo.MultiOutput<InputT, ?> parDo, PCollection<InputT> mainInput, DoFnSchemaInformation doFnSchemaInformation, Pipeline pipeline, SdkComponents components) throws IOException {
    final DoFn<?, ?> doFn = parDo.getFn();
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final String restrictionCoderId;
    if (signature.processElement().isSplittable()) {
        DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(doFn);
        final Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of(doFnInvoker.invokeGetRestrictionCoder(pipeline.getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder(pipeline.getCoderRegistry()));
        restrictionCoderId = components.registerCoder(restrictionAndWatermarkStateCoder);
    } else {
        restrictionCoderId = "";
    }
    Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) mainInput.getWindowingStrategy().getWindowFn().windowCoder();
    Coder<?> keyCoder;
    if (signature.usesState() || signature.usesTimers()) {
        checkArgument(mainInput.getCoder() instanceof KvCoder, "DoFn's that use state or timers must have an input PCollection with a KvCoder but received %s", mainInput.getCoder());
        keyCoder = ((KvCoder) mainInput.getCoder()).getKeyCoder();
    } else {
        keyCoder = null;
    }
    return payloadForParDoLike(new ParDoLike() {

        @Override
        public FunctionSpec translateDoFn(SdkComponents newComponents) {
            return ParDoTranslation.translateDoFn(parDo.getFn(), parDo.getMainOutputTag(), parDo.getSideInputs(), doFnSchemaInformation, newComponents);
        }

        @Override
        public Map<String, SideInput> translateSideInputs(SdkComponents components) {
            Map<String, SideInput> sideInputs = new HashMap<>();
            for (PCollectionView<?> sideInput : parDo.getSideInputs().values()) {
                sideInputs.put(sideInput.getTagInternal().getId(), translateView(sideInput, components));
            }
            return sideInputs;
        }

        @Override
        public Map<String, RunnerApi.StateSpec> translateStateSpecs(SdkComponents components) throws IOException {
            Map<String, RunnerApi.StateSpec> stateSpecs = new HashMap<>();
            for (Map.Entry<String, StateDeclaration> state : signature.stateDeclarations().entrySet()) {
                RunnerApi.StateSpec spec = translateStateSpec(getStateSpecOrThrow(state.getValue(), doFn), components);
                stateSpecs.put(state.getKey(), spec);
            }
            return stateSpecs;
        }

        @Override
        public ParDoLikeTimerFamilySpecs translateTimerFamilySpecs(SdkComponents newComponents) {
            Map<String, RunnerApi.TimerFamilySpec> timerFamilySpecs = new HashMap<>();
            for (Map.Entry<String, TimerDeclaration> timer : signature.timerDeclarations().entrySet()) {
                RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(getTimerSpecOrThrow(timer.getValue(), doFn), newComponents, keyCoder, windowCoder);
                timerFamilySpecs.put(timer.getKey(), spec);
            }
            for (Map.Entry<String, DoFnSignature.TimerFamilyDeclaration> timerFamily : signature.timerFamilyDeclarations().entrySet()) {
                RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(DoFnSignatures.getTimerFamilySpecOrThrow(timerFamily.getValue(), doFn), newComponents, keyCoder, windowCoder);
                timerFamilySpecs.put(timerFamily.getKey(), spec);
            }
            String onWindowExpirationTimerFamilySpec = null;
            if (signature.onWindowExpiration() != null) {
                RunnerApi.TimerFamilySpec spec = RunnerApi.TimerFamilySpec.newBuilder().setTimeDomain(translateTimeDomain(TimeDomain.EVENT_TIME)).setTimerFamilyCoderId(registerCoderOrThrow(components, Timer.Coder.of(keyCoder, windowCoder))).build();
                for (int i = 0; i < Integer.MAX_VALUE; ++i) {
                    onWindowExpirationTimerFamilySpec = "onWindowExpiration" + i;
                    if (!timerFamilySpecs.containsKey(onWindowExpirationTimerFamilySpec)) {
                        break;
                    }
                }
                timerFamilySpecs.put(onWindowExpirationTimerFamilySpec, spec);
            }
            return ParDoLikeTimerFamilySpecs.create(timerFamilySpecs, onWindowExpirationTimerFamilySpec);
        }

        @Override
        public boolean isStateful() {
            return !signature.stateDeclarations().isEmpty() || !signature.timerDeclarations().isEmpty() || !signature.timerFamilyDeclarations().isEmpty() || signature.onWindowExpiration() != null;
        }

        @Override
        public boolean isSplittable() {
            return signature.processElement().isSplittable();
        }

        @Override
        public boolean isRequiresStableInput() {
            return signature.processElement().requiresStableInput();
        }

        @Override
        public boolean isRequiresTimeSortedInput() {
            return signature.processElement().requiresTimeSortedInput();
        }

        @Override
        public boolean requestsFinalization() {
            return (signature.startBundle() != null && signature.startBundle().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.processElement() != null && signature.processElement().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.finishBundle() != null && signature.finishBundle().extraParameters().contains(Parameter.bundleFinalizer()));
        }

        @Override
        public String translateRestrictionCoderId(SdkComponents newComponents) {
            return restrictionCoderId;
        }
    }, components);
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) KvCoder(org.apache.beam.sdk.coders.KvCoder) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IOException(java.io.IOException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StateSpec(org.apache.beam.sdk.state.StateSpec) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Map(java.util.Map) HashMap(java.util.HashMap) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 5 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method getMainInputName.

/**
 * Returns the name of the main input of the ptransform.
 */
public static String getMainInputName(RunnerApi.PTransformOrBuilder ptransform) throws IOException {
    checkArgument(PAR_DO_TRANSFORM_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PAIR_WITH_RESTRICTION_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_ELEMENTS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn()), "Unexpected payload type %s", ptransform.getSpec().getUrn());
    ParDoPayload payload = ParDoPayload.parseFrom(ptransform.getSpec().getPayload());
    return getMainInputName(ptransform, payload);
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)11 ParDoPayload (org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)10 Map (java.util.Map)8 ArrayList (java.util.ArrayList)6 Pipeline (org.apache.beam.sdk.Pipeline)6 PCollectionView (org.apache.beam.sdk.values.PCollectionView)6 Test (org.junit.Test)6 IOException (java.io.IOException)5 HashMap (java.util.HashMap)5 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)5 DoFn (org.apache.beam.sdk.transforms.DoFn)4 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)4 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)3 ExpansionApi (org.apache.beam.model.expansion.v1.ExpansionApi)3 ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)3 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)3 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)3 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)3 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)3