Search in sources :

Example 1 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ProcessBundleHandlerTest method testOrderOfSetupTeardownCalls.

@Test
public void testOrderOfSetupTeardownCalls() throws Exception {
    DoFnWithExecutionInformation doFnWithExecutionInformation = DoFnWithExecutionInformation.of(new TestDoFn(), TestDoFn.mainOutput, Collections.emptyMap(), DoFnSchemaInformation.create());
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN).setPayload(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnWithExecutionInformation))).build();
    RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.newBuilder().setDoFn(functionSpec).build();
    BeamFnApi.ProcessBundleDescriptor processBundleDescriptor = BeamFnApi.ProcessBundleDescriptor.newBuilder().putTransforms("2L", PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(DATA_INPUT_URN).build()).putOutputs("2L-output", "2L-output-pc").build()).putTransforms("3L", PTransform.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(parDoPayload.toByteString())).putInputs("3L-input", "2L-output-pc").build()).putPcollections("2L-output-pc", PCollection.newBuilder().setWindowingStrategyId("window-strategy").setCoderId("2L-output-coder").setIsBounded(IsBounded.Enum.BOUNDED).build()).putWindowingStrategies("window-strategy", WindowingStrategy.newBuilder().setWindowCoderId("window-strategy-coder").setWindowFn(RunnerApi.FunctionSpec.newBuilder().setUrn("beam:window_fn:global_windows:v1")).setOutputTime(RunnerApi.OutputTime.Enum.END_OF_WINDOW).setAccumulationMode(RunnerApi.AccumulationMode.Enum.ACCUMULATING).setTrigger(RunnerApi.Trigger.newBuilder().setAlways(RunnerApi.Trigger.Always.getDefaultInstance())).setClosingBehavior(RunnerApi.ClosingBehavior.Enum.EMIT_ALWAYS).setOnTimeBehavior(RunnerApi.OnTimeBehavior.Enum.FIRE_ALWAYS).build()).putCoders("2L-output-coder", CoderTranslation.toProto(StringUtf8Coder.of()).getCoder()).putCoders("window-strategy-coder", Coder.newBuilder().setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(ModelCoders.GLOBAL_WINDOW_CODER_URN).build()).build()).build();
    Map<String, BeamFnApi.ProcessBundleDescriptor> fnApiRegistry = ImmutableMap.of("1L", processBundleDescriptor);
    Map<String, PTransformRunnerFactory> urnToPTransformRunnerFactoryMap = Maps.newHashMap(REGISTERED_RUNNER_FACTORIES);
    urnToPTransformRunnerFactoryMap.put(DATA_INPUT_URN, (context) -> null);
    ProcessBundleHandler handler = new ProcessBundleHandler(PipelineOptionsFactory.create(), Collections.emptySet(), fnApiRegistry::get, beamFnDataClient, null, /* beamFnStateClient */
    null, /* finalizeBundleHandler */
    new ShortIdMap(), urnToPTransformRunnerFactoryMap, Caches.noop(), new BundleProcessorCache());
    handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("998L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
    handler.processBundle(BeamFnApi.InstructionRequest.newBuilder().setInstructionId("999L").setProcessBundle(BeamFnApi.ProcessBundleRequest.newBuilder().setProcessBundleDescriptorId("1L")).build());
    handler.shutdown();
    // setup and teardown should occur only once when processing multiple bundles for the same
    // descriptor
    assertThat(TestDoFn.orderOfOperations, contains("setUp", "startBundle", "finishBundle", "startBundle", "finishBundle", "tearDown"));
}
Also used : ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) BeamFnApi(org.apache.beam.model.fnexecution.v1.BeamFnApi) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) BundleProcessorCache(org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ShortIdMap(org.apache.beam.runners.core.metrics.ShortIdMap) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PTransformRunnerFactory(org.apache.beam.fn.harness.PTransformRunnerFactory) Test(org.junit.Test)

Example 2 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method translateParDo.

public static ParDoPayload translateParDo(AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components) throws IOException {
    final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
    final Pipeline pipeline = appliedPTransform.getPipeline();
    final DoFn<?, ?> doFn = parDo.getFn();
    // Get main input.
    Set<String> allInputs = appliedPTransform.getInputs().keySet().stream().map(TupleTag::getId).collect(Collectors.toSet());
    Set<String> sideInputs = parDo.getSideInputs().values().stream().map(s -> s.getTagInternal().getId()).collect(Collectors.toSet());
    String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
    PCollection<?> mainInput = (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));
    final DoFnSchemaInformation doFnSchemaInformation = ParDo.getDoFnSchemaInformation(doFn, mainInput);
    return translateParDo((ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration) DoFnSignatures.getTimerSpecOrThrow(org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getTimerSpecOrThrow) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) Sets(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) DoFnInvoker(org.apache.beam.sdk.transforms.reflect.DoFnInvoker) SPLITTABLE_PROCESS_ELEMENTS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN) SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN) KvCoder(org.apache.beam.sdk.coders.KvCoder) PAR_DO_TRANSFORM_URN(org.apache.beam.runners.core.construction.PTransformTranslation.PAR_DO_TRANSFORM_URN) Set(java.util.Set) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) StandardUserStateTypes(org.apache.beam.model.pipeline.v1.RunnerApi.StandardUserStateTypes) Collectors(java.util.stream.Collectors) SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN) TransformTranslator(org.apache.beam.runners.core.construction.PTransformTranslation.TransformTranslator) List(java.util.List) StandardRequirements(org.apache.beam.model.pipeline.v1.RunnerApi.StandardRequirements) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) AutoValue(com.google.auto.value.AutoValue) DoFnInvokers(org.apache.beam.sdk.transforms.reflect.DoFnInvokers) DoFnSignatures.getStateSpecOrThrow(org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getStateSpecOrThrow) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) Combine(org.apache.beam.sdk.transforms.Combine) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) SPLITTABLE_PAIR_WITH_RESTRICTION_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN) WindowMappingFn(org.apache.beam.sdk.transforms.windowing.WindowMappingFn) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput) TimerSpec(org.apache.beam.sdk.state.TimerSpec) ViewFn(org.apache.beam.sdk.transforms.ViewFn) TupleTag(org.apache.beam.sdk.values.TupleTag) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) Nullable(javax.annotation.Nullable) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) TransformPayloadTranslator(org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator) StateSpecs(org.apache.beam.sdk.state.StateSpecs) TimerDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) PCollection(org.apache.beam.sdk.values.PCollection) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline)

Example 3 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method getMainInputName.

/**
 * Returns the name of the main input of the ptransform.
 */
public static String getMainInputName(RunnerApi.PTransformOrBuilder ptransform) throws IOException {
    checkArgument(PAR_DO_TRANSFORM_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PAIR_WITH_RESTRICTION_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_ELEMENTS_URN.equals(ptransform.getSpec().getUrn()) || SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN.equals(ptransform.getSpec().getUrn()), "Unexpected payload type %s", ptransform.getSpec().getUrn());
    ParDoPayload payload = ParDoPayload.parseFrom(ptransform.getSpec().getPayload());
    return getMainInputName(ptransform, payload);
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)

Example 4 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class PipelineValidator method validateParDo.

private static void validateParDo(String id, PTransform transform, Components components, Set<String> requirements) throws Exception {
    ParDoPayload payload = ParDoPayload.parseFrom(transform.getSpec().getPayload());
    // side_inputs
    for (String sideInputId : payload.getSideInputsMap().keySet()) {
        checkArgument(transform.containsInputs(sideInputId), "Transform %s side input %s is not listed in the transform's inputs", id, sideInputId);
    }
    if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN));
    // TODO: Validate state_specs and timer_specs
    }
    if (!payload.getRestrictionCoderId().isEmpty()) {
        checkArgument(components.containsCoders(payload.getRestrictionCoderId()));
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_SPLITTABLE_DOFN_URN));
    }
    if (payload.getRequestsFinalization()) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_BUNDLE_FINALIZATION_URN));
    }
    if (payload.getRequiresStableInput()) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_STABLE_INPUT_URN));
    }
    if (payload.getRequiresTimeSortedInput()) {
        checkArgument(requirements.contains(ParDoTranslation.REQUIRES_TIME_SORTED_INPUT_URN));
    }
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)

Example 5 with ParDoPayload

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload in project beam by apache.

the class EnvironmentsTest method getEnvironmentPTransform.

@Test
public void getEnvironmentPTransform() throws IOException {
    Pipeline p = Pipeline.create();
    SdkComponents components = SdkComponents.create();
    Environment env = Environments.createDockerEnvironment("java");
    components.registerEnvironment(env);
    ParDoPayload payload = ParDoTranslation.translateParDo(ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
        }
    }).withOutputTags(new TupleTag<>(), TupleTagList.empty()), PCollection.createPrimitiveOutputInternal(p, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of()), DoFnSchemaInformation.create(), Pipeline.create(), components);
    RehydratedComponents rehydratedComponents = RehydratedComponents.forComponents(components.toComponents());
    PTransform ptransform = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(payload.toByteString()).build()).setEnvironmentId(components.getOnlyEnvironmentId()).build();
    Environment env1 = Environments.getEnvironment(ptransform, rehydratedComponents).get();
    assertThat(env1, equalTo(components.toComponents().getEnvironmentsOrThrow(ptransform.getEnvironmentId())));
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) TupleTag(org.apache.beam.sdk.values.TupleTag) Pipeline(org.apache.beam.sdk.Pipeline) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Aggregations

ParDoPayload (org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)10 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)6 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)4 TupleTag (org.apache.beam.sdk.values.TupleTag)4 ByteString (com.google.protobuf.ByteString)2 IOException (java.io.IOException)2 Collections (java.util.Collections)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Set (java.util.Set)2 PTransformRunnerFactory (org.apache.beam.fn.harness.PTransformRunnerFactory)2 BundleProcessorCache (org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessorCache)2 BeamFnApi (org.apache.beam.model.fnexecution.v1.BeamFnApi)2 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)2 Pipeline (org.apache.beam.sdk.Pipeline)2 ParDoPayload (org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload)2 MultiOutput (org.apache.beam.sdk.transforms.ParDo.MultiOutput)2 DoFnWithExecutionInformation (org.apache.beam.sdk.util.DoFnWithExecutionInformation)2