Search in sources :

Example 21 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class ParDoTranslation method translateParDo.

public static ParDoPayload translateParDo(AppliedPTransform<?, ?, ParDo.MultiOutput<?, ?>> appliedPTransform, SdkComponents components) throws IOException {
    final ParDo.MultiOutput<?, ?> parDo = appliedPTransform.getTransform();
    final Pipeline pipeline = appliedPTransform.getPipeline();
    final DoFn<?, ?> doFn = parDo.getFn();
    // Get main input.
    Set<String> allInputs = appliedPTransform.getInputs().keySet().stream().map(TupleTag::getId).collect(Collectors.toSet());
    Set<String> sideInputs = parDo.getSideInputs().values().stream().map(s -> s.getTagInternal().getId()).collect(Collectors.toSet());
    String mainInputName = Iterables.getOnlyElement(Sets.difference(allInputs, sideInputs));
    PCollection<?> mainInput = (PCollection<?>) appliedPTransform.getInputs().get(new TupleTag<>(mainInputName));
    final DoFnSchemaInformation doFnSchemaInformation = ParDo.getDoFnSchemaInformation(doFn, mainInput);
    return translateParDo((ParDo.MultiOutput) parDo, mainInput, doFnSchemaInformation, pipeline, components);
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) StateDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.StateDeclaration) DoFnSignatures.getTimerSpecOrThrow(org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getTimerSpecOrThrow) Parameter(org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) DoFnSignatures(org.apache.beam.sdk.transforms.reflect.DoFnSignatures) Sets(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Sets) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) Map(java.util.Map) Iterables(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) DoFnInvoker(org.apache.beam.sdk.transforms.reflect.DoFnInvoker) SPLITTABLE_PROCESS_ELEMENTS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_ELEMENTS_URN) SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_SPLIT_AND_SIZE_RESTRICTIONS_URN) KvCoder(org.apache.beam.sdk.coders.KvCoder) PAR_DO_TRANSFORM_URN(org.apache.beam.runners.core.construction.PTransformTranslation.PAR_DO_TRANSFORM_URN) Set(java.util.Set) DoFnWithExecutionInformation(org.apache.beam.sdk.util.DoFnWithExecutionInformation) StandardUserStateTypes(org.apache.beam.model.pipeline.v1.RunnerApi.StandardUserStateTypes) Collectors(java.util.stream.Collectors) SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS_URN) TransformTranslator(org.apache.beam.runners.core.construction.PTransformTranslation.TransformTranslator) List(java.util.List) StandardRequirements(org.apache.beam.model.pipeline.v1.RunnerApi.StandardRequirements) ParDo(org.apache.beam.sdk.transforms.ParDo) SerializableUtils(org.apache.beam.sdk.util.SerializableUtils) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting) AutoValue(com.google.auto.value.AutoValue) DoFnInvokers(org.apache.beam.sdk.transforms.reflect.DoFnInvokers) DoFnSignatures.getStateSpecOrThrow(org.apache.beam.sdk.transforms.reflect.DoFnSignatures.getStateSpecOrThrow) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput) Preconditions.checkNotNull(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkNotNull) Combine(org.apache.beam.sdk.transforms.Combine) Coder(org.apache.beam.sdk.coders.Coder) HashMap(java.util.HashMap) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) SPLITTABLE_PAIR_WITH_RESTRICTION_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_PAIR_WITH_RESTRICTION_URN) WindowMappingFn(org.apache.beam.sdk.transforms.windowing.WindowMappingFn) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput) TimerSpec(org.apache.beam.sdk.state.TimerSpec) ViewFn(org.apache.beam.sdk.transforms.ViewFn) TupleTag(org.apache.beam.sdk.values.TupleTag) Preconditions.checkArgument(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkArgument) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) Nullable(javax.annotation.Nullable) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature) IOException(java.io.IOException) PCollection(org.apache.beam.sdk.values.PCollection) TransformPayloadTranslator(org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator) StateSpecs(org.apache.beam.sdk.state.StateSpecs) TimerDeclaration(org.apache.beam.sdk.transforms.reflect.DoFnSignature.TimerDeclaration) Preconditions.checkState(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Preconditions.checkState) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN(org.apache.beam.runners.core.construction.PTransformTranslation.SPLITTABLE_TRUNCATE_SIZED_RESTRICTION_URN) Collections(java.util.Collections) TimeDomain(org.apache.beam.sdk.state.TimeDomain) BeamUrns.getUrn(org.apache.beam.runners.core.construction.BeamUrns.getUrn) PCollection(org.apache.beam.sdk.values.PCollection) DoFnSchemaInformation(org.apache.beam.sdk.transforms.DoFnSchemaInformation) ParDo(org.apache.beam.sdk.transforms.ParDo) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline)

Example 22 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class ParDoTranslation method translateParDo.

/**
 * Translate a ParDo.
 */
public static <InputT> ParDoPayload translateParDo(ParDo.MultiOutput<InputT, ?> parDo, PCollection<InputT> mainInput, DoFnSchemaInformation doFnSchemaInformation, Pipeline pipeline, SdkComponents components) throws IOException {
    final DoFn<?, ?> doFn = parDo.getFn();
    final DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
    final String restrictionCoderId;
    if (signature.processElement().isSplittable()) {
        DoFnInvoker<?, ?> doFnInvoker = DoFnInvokers.invokerFor(doFn);
        final Coder<?> restrictionAndWatermarkStateCoder = KvCoder.of(doFnInvoker.invokeGetRestrictionCoder(pipeline.getCoderRegistry()), doFnInvoker.invokeGetWatermarkEstimatorStateCoder(pipeline.getCoderRegistry()));
        restrictionCoderId = components.registerCoder(restrictionAndWatermarkStateCoder);
    } else {
        restrictionCoderId = "";
    }
    Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) mainInput.getWindowingStrategy().getWindowFn().windowCoder();
    Coder<?> keyCoder;
    if (signature.usesState() || signature.usesTimers()) {
        checkArgument(mainInput.getCoder() instanceof KvCoder, "DoFn's that use state or timers must have an input PCollection with a KvCoder but received %s", mainInput.getCoder());
        keyCoder = ((KvCoder) mainInput.getCoder()).getKeyCoder();
    } else {
        keyCoder = null;
    }
    return payloadForParDoLike(new ParDoLike() {

        @Override
        public FunctionSpec translateDoFn(SdkComponents newComponents) {
            return ParDoTranslation.translateDoFn(parDo.getFn(), parDo.getMainOutputTag(), parDo.getSideInputs(), doFnSchemaInformation, newComponents);
        }

        @Override
        public Map<String, SideInput> translateSideInputs(SdkComponents components) {
            Map<String, SideInput> sideInputs = new HashMap<>();
            for (PCollectionView<?> sideInput : parDo.getSideInputs().values()) {
                sideInputs.put(sideInput.getTagInternal().getId(), translateView(sideInput, components));
            }
            return sideInputs;
        }

        @Override
        public Map<String, RunnerApi.StateSpec> translateStateSpecs(SdkComponents components) throws IOException {
            Map<String, RunnerApi.StateSpec> stateSpecs = new HashMap<>();
            for (Map.Entry<String, StateDeclaration> state : signature.stateDeclarations().entrySet()) {
                RunnerApi.StateSpec spec = translateStateSpec(getStateSpecOrThrow(state.getValue(), doFn), components);
                stateSpecs.put(state.getKey(), spec);
            }
            return stateSpecs;
        }

        @Override
        public ParDoLikeTimerFamilySpecs translateTimerFamilySpecs(SdkComponents newComponents) {
            Map<String, RunnerApi.TimerFamilySpec> timerFamilySpecs = new HashMap<>();
            for (Map.Entry<String, TimerDeclaration> timer : signature.timerDeclarations().entrySet()) {
                RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(getTimerSpecOrThrow(timer.getValue(), doFn), newComponents, keyCoder, windowCoder);
                timerFamilySpecs.put(timer.getKey(), spec);
            }
            for (Map.Entry<String, DoFnSignature.TimerFamilyDeclaration> timerFamily : signature.timerFamilyDeclarations().entrySet()) {
                RunnerApi.TimerFamilySpec spec = translateTimerFamilySpec(DoFnSignatures.getTimerFamilySpecOrThrow(timerFamily.getValue(), doFn), newComponents, keyCoder, windowCoder);
                timerFamilySpecs.put(timerFamily.getKey(), spec);
            }
            String onWindowExpirationTimerFamilySpec = null;
            if (signature.onWindowExpiration() != null) {
                RunnerApi.TimerFamilySpec spec = RunnerApi.TimerFamilySpec.newBuilder().setTimeDomain(translateTimeDomain(TimeDomain.EVENT_TIME)).setTimerFamilyCoderId(registerCoderOrThrow(components, Timer.Coder.of(keyCoder, windowCoder))).build();
                for (int i = 0; i < Integer.MAX_VALUE; ++i) {
                    onWindowExpirationTimerFamilySpec = "onWindowExpiration" + i;
                    if (!timerFamilySpecs.containsKey(onWindowExpirationTimerFamilySpec)) {
                        break;
                    }
                }
                timerFamilySpecs.put(onWindowExpirationTimerFamilySpec, spec);
            }
            return ParDoLikeTimerFamilySpecs.create(timerFamilySpecs, onWindowExpirationTimerFamilySpec);
        }

        @Override
        public boolean isStateful() {
            return !signature.stateDeclarations().isEmpty() || !signature.timerDeclarations().isEmpty() || !signature.timerFamilyDeclarations().isEmpty() || signature.onWindowExpiration() != null;
        }

        @Override
        public boolean isSplittable() {
            return signature.processElement().isSplittable();
        }

        @Override
        public boolean isRequiresStableInput() {
            return signature.processElement().requiresStableInput();
        }

        @Override
        public boolean isRequiresTimeSortedInput() {
            return signature.processElement().requiresTimeSortedInput();
        }

        @Override
        public boolean requestsFinalization() {
            return (signature.startBundle() != null && signature.startBundle().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.processElement() != null && signature.processElement().extraParameters().contains(Parameter.bundleFinalizer())) || (signature.finishBundle() != null && signature.finishBundle().extraParameters().contains(Parameter.bundleFinalizer()));
        }

        @Override
        public String translateRestrictionCoderId(SdkComponents newComponents) {
            return restrictionCoderId;
        }
    }, components);
}
Also used : KvCoder(org.apache.beam.sdk.coders.KvCoder) Coder(org.apache.beam.sdk.coders.Coder) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) KvCoder(org.apache.beam.sdk.coders.KvCoder) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IOException(java.io.IOException) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StateSpec(org.apache.beam.sdk.state.StateSpec) PCollectionView(org.apache.beam.sdk.values.PCollectionView) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Map(java.util.Map) HashMap(java.util.HashMap) DoFnSignature(org.apache.beam.sdk.transforms.reflect.DoFnSignature)

Example 23 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class FusedPipeline method toPipeline.

/**
 * Returns the {@link RunnerApi.Pipeline} representation of this {@link FusedPipeline}.
 *
 * <p>The {@link Components} of the returned pipeline will contain all of the {@link PTransform
 * PTransforms} present in the original Pipeline that this {@link FusedPipeline} was created from,
 * plus all of the {@link ExecutableStage ExecutableStages} contained within this {@link
 * FusedPipeline}. The {@link Pipeline#getRootTransformIdsList()} will contain all of the runner
 * executed transforms and all of the {@link ExecutableStage execuable stages} contained within
 * the Pipeline.
 */
public RunnerApi.Pipeline toPipeline() {
    Map<String, PTransform> executableStageTransforms = getEnvironmentExecutedTransforms();
    Set<String> executableTransformIds = Sets.union(executableStageTransforms.keySet(), getRunnerExecutedTransforms().stream().map(PTransformNode::getId).collect(Collectors.toSet()));
    // Augment the initial transforms with all of the executable transforms.
    Components fusedComponents = getComponents().toBuilder().putAllTransforms(executableStageTransforms).build();
    List<String> rootTransformIds = StreamSupport.stream(QueryablePipeline.forTransforms(executableTransformIds, fusedComponents).getTopologicallyOrderedTransforms().spliterator(), false).map(PTransformNode::getId).collect(Collectors.toList());
    Pipeline res = Pipeline.newBuilder().setComponents(fusedComponents).addAllRootTransformIds(rootTransformIds).addAllRequirements(getRequirements()).build();
    // Validate that fusion didn't produce a malformed pipeline.
    PipelineValidator.validate(res);
    return res;
}
Also used : SyntheticComponents(org.apache.beam.runners.core.construction.SyntheticComponents) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)

Example 24 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class GreedyStageFuser method getStageEnvironment.

private static Environment getStageEnvironment(QueryablePipeline pipeline, Set<PTransformNode> initialNodes) {
    Supplier<IllegalArgumentException> missingEnv = () -> new IllegalArgumentException(String.format("%s must be populated on all %s in a %s", Environment.class.getSimpleName(), PTransformNode.class.getSimpleName(), GreedyStageFuser.class.getSimpleName()));
    Environment env = pipeline.getEnvironment(initialNodes.iterator().next()).orElseThrow(missingEnv);
    initialNodes.forEach(transformNode -> checkArgument(env.equals(pipeline.getEnvironment(transformNode).orElseThrow(missingEnv)), "All %s in a %s must be the same. Got %s and %s", Environment.class.getSimpleName(), ExecutableStage.class.getSimpleName(), env, pipeline.getEnvironment(transformNode).get()));
    return env;
}
Also used : Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment)

Example 25 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class ExecutableStageTranslationTest method testOperatorNameGeneration.

@Test
public /* Test for generating readable operator names during translation. */
void testOperatorNameGeneration() throws Exception {
    Pipeline p = Pipeline.create();
    p.apply(Impulse.create()).apply(ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void processElement(ProcessContext processContext, OutputReceiver<String> outputReceiver) {
        }
    })).apply("MyName", ParDo.of(new DoFn<String, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext processContext, OutputReceiver<Integer> outputReceiver) {
        }
    })).apply(// Avoid nested Anonymous ParDo
    "Composite/Nested/ParDo", ParDo.of(new DoFn<Integer, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext processContext, OutputReceiver<Integer> outputReceiver) {
        }
    }));
    ExecutableStage firstEnvStage = GreedyPipelineFuser.fuse(PipelineTranslation.toProto(p)).getFusedStages().stream().findFirst().get();
    RunnerApi.ExecutableStagePayload basePayload = RunnerApi.ExecutableStagePayload.parseFrom(firstEnvStage.toPTransform("foo").getSpec().getPayload());
    String executableStageName = ExecutableStageTranslation.generateNameFromStagePayload(basePayload);
    assertThat(executableStageName, is("[3]{ParDo(Anonymous), MyName, Composite}"));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) ExecutableStage(org.apache.beam.runners.core.construction.graph.ExecutableStage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)117 Test (org.junit.Test)87 Pipeline (org.apache.beam.sdk.Pipeline)82 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)44 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)43 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)38 Map (java.util.Map)32 KV (org.apache.beam.sdk.values.KV)26 Job (com.google.api.services.dataflow.model.Job)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)24 KvCoder (org.apache.beam.sdk.coders.KvCoder)24 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)23 Coder (org.apache.beam.sdk.coders.Coder)23 ArrayList (java.util.ArrayList)22 WindowedValue (org.apache.beam.sdk.util.WindowedValue)22 HashMap (java.util.HashMap)20 List (java.util.List)20 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)19 IOException (java.io.IOException)18 PCollection (org.apache.beam.sdk.values.PCollection)18