Search in sources :

Example 1 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class SparkStreamingPortablePipelineTranslator method translateExecutableStage.

private static <InputT, OutputT, SideInputT> void translateExecutableStage(PTransformNode transformNode, RunnerApi.Pipeline pipeline, SparkStreamingTranslationContext context) {
    RunnerApi.ExecutableStagePayload stagePayload;
    try {
        stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transformNode.getTransform().getSpec().getPayload());
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String inputPCollectionId = stagePayload.getInput();
    UnboundedDataset<InputT> inputDataset = (UnboundedDataset<InputT>) context.popDataset(inputPCollectionId);
    List<Integer> streamSources = inputDataset.getStreamSources();
    JavaDStream<WindowedValue<InputT>> inputDStream = inputDataset.getDStream();
    Map<String, String> outputs = transformNode.getTransform().getOutputsMap();
    BiMap<String, Integer> outputMap = createOutputMap(outputs.values());
    RunnerApi.Components components = pipeline.getComponents();
    Coder windowCoder = getWindowingStrategy(inputPCollectionId, components).getWindowFn().windowCoder();
    // TODO (BEAM-10712): handle side inputs.
    if (stagePayload.getSideInputsCount() > 0) {
        throw new UnsupportedOperationException("Side inputs to executable stage are currently unsupported.");
    }
    ImmutableMap<String, Tuple2<Broadcast<List<byte[]>>, WindowedValue.WindowedValueCoder<SideInputT>>> broadcastVariables = ImmutableMap.copyOf(new HashMap<>());
    SparkExecutableStageFunction<InputT, SideInputT> function = new SparkExecutableStageFunction<>(context.getSerializableOptions(), stagePayload, context.jobInfo, outputMap, SparkExecutableStageContextFactory.getInstance(), broadcastVariables, MetricsAccumulator.getInstance(), windowCoder);
    JavaDStream<RawUnionValue> staged = inputDStream.mapPartitions(function);
    String intermediateId = getExecutableStageIntermediateId(transformNode);
    context.pushDataset(intermediateId, new Dataset() {

        @Override
        public void cache(String storageLevel, Coder<?> coder) {
            StorageLevel level = StorageLevel.fromString(storageLevel);
            staged.persist(level);
        }

        @Override
        public void action() {
            // Empty function to force computation of RDD.
            staged.foreachRDD(TranslationUtils.emptyVoidFunction());
        }

        @Override
        public void setName(String name) {
        // ignore
        }
    });
    // Pop dataset to mark DStream as used
    context.popDataset(intermediateId);
    for (String outputId : outputs.values()) {
        JavaDStream<WindowedValue<OutputT>> outStream = staged.flatMap(new SparkExecutableStageExtractionFunction<>(outputMap.get(outputId)));
        context.pushDataset(outputId, new UnboundedDataset<>(outStream, streamSources));
    }
    // Add sink to ensure stage is executed
    if (outputs.isEmpty()) {
        JavaDStream<WindowedValue<OutputT>> outStream = staged.flatMap((rawUnionValue) -> Collections.emptyIterator());
        context.pushDataset(String.format("EmptyOutputSink_%d", context.nextSinkId()), new UnboundedDataset<>(outStream, streamSources));
    }
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) UnboundedDataset(org.apache.beam.runners.spark.translation.streaming.UnboundedDataset) WindowedValue(org.apache.beam.sdk.util.WindowedValue) List(java.util.List) ArrayList(java.util.ArrayList) StorageLevel(org.apache.spark.storage.StorageLevel) KvCoder(org.apache.beam.sdk.coders.KvCoder) PipelineTranslatorUtils.getWindowedValueCoder(org.apache.beam.runners.fnexecution.translation.PipelineTranslatorUtils.getWindowedValueCoder) Coder(org.apache.beam.sdk.coders.Coder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) RawUnionValue(org.apache.beam.sdk.transforms.join.RawUnionValue) UnboundedDataset(org.apache.beam.runners.spark.translation.streaming.UnboundedDataset) IOException(java.io.IOException) Tuple2(scala.Tuple2)

Example 2 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class OutputDeduplicatorTest method unchangedWithNoDuplicates.

@Test
public void unchangedWithNoDuplicates() {
    /* When all the PCollections are produced by only one transform or stage, the result should be
     * empty/identical to the input.
     *
     * Pipeline:
     *              /-> one -> .out \
     * red -> .out ->                -> blue -> .out
     *              \-> two -> .out /
     */
    PCollection redOut = PCollection.newBuilder().setUniqueName("red.out").build();
    PTransform red = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putOutputs("out", redOut.getUniqueName()).build();
    PCollection oneOut = PCollection.newBuilder().setUniqueName("one.out").build();
    PTransform one = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", redOut.getUniqueName()).putOutputs("out", oneOut.getUniqueName()).build();
    PCollection twoOut = PCollection.newBuilder().setUniqueName("two.out").build();
    PTransform two = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", redOut.getUniqueName()).putOutputs("out", twoOut.getUniqueName()).build();
    PCollection blueOut = PCollection.newBuilder().setUniqueName("blue.out").build();
    PTransform blue = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("one", oneOut.getUniqueName()).putInputs("two", twoOut.getUniqueName()).putOutputs("out", blueOut.getUniqueName()).build();
    RunnerApi.Components components = Components.newBuilder().putTransforms("one", one).putPcollections(oneOut.getUniqueName(), oneOut).putTransforms("two", two).putPcollections(twoOut.getUniqueName(), twoOut).putTransforms("red", red).putPcollections(redOut.getUniqueName(), redOut).putTransforms("blue", blue).putPcollections(blueOut.getUniqueName(), blueOut).build();
    ExecutableStage oneStage = ImmutableExecutableStage.of(components, Environment.getDefaultInstance(), PipelineNode.pCollection(redOut.getUniqueName(), redOut), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(PipelineNode.pTransform("one", one)), ImmutableList.of(PipelineNode.pCollection(oneOut.getUniqueName(), oneOut)), DEFAULT_WIRE_CODER_SETTINGS);
    ExecutableStage twoStage = ImmutableExecutableStage.of(components, Environment.getDefaultInstance(), PipelineNode.pCollection(redOut.getUniqueName(), redOut), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(PipelineNode.pTransform("two", two)), ImmutableList.of(PipelineNode.pCollection(twoOut.getUniqueName(), twoOut)), DEFAULT_WIRE_CODER_SETTINGS);
    PTransformNode redTransform = PipelineNode.pTransform("red", red);
    PTransformNode blueTransform = PipelineNode.pTransform("blue", blue);
    QueryablePipeline pipeline = QueryablePipeline.forPrimitivesIn(components);
    DeduplicationResult result = OutputDeduplicator.ensureSingleProducer(pipeline, ImmutableList.of(oneStage, twoStage), ImmutableList.of(redTransform, blueTransform));
    assertThat(result.getDeduplicatedComponents(), equalTo(components));
    assertThat(result.getDeduplicatedStages().keySet(), empty());
    assertThat(result.getDeduplicatedTransforms().keySet(), empty());
    assertThat(result.getIntroducedTransforms(), empty());
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DeduplicationResult(org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 3 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class OutputDeduplicatorTest method multipleDuplicatesInStages.

@Test
public void multipleDuplicatesInStages() {
    /* A stage that produces multiple duplicates should have them all synthesized.
     *
     * Original Pipeline:
     * red -> .out ---> one -> .out -----\
     *             \                      -> shared.out
     *              \--> two -> .out ----|
     *               \                    -> otherShared -> .out
     *                \-> three --> .out /
     *
     * Fused Pipeline:
     *      -> .out [-> one -> .out -> shared -> .out] \
     *     /                                            -> blue -> .out
     *     |                        -> shared -> .out] /
     * red -> .out [-> two -> .out |
     *     |                        -> otherShared -> .out]
     *     \
     *      -> .out [-> three -> .out -> otherShared -> .out]
     *
     * Deduplicated Pipeline:
     *           [-> one -> .out -> shared -> .out:0] --\
     *           |                                       -> shared -> .out -> blue -> .out
     *           |                 -> shared -> .out:1] /
     * red -> .out [-> two -> .out |
     *           |                  -> otherShared -> .out:0] --\
     *           |                                               -> otherShared -> .out
     *           [-> three -> .out -> otherShared -> .out:1] ---/
     */
    PCollection redOut = PCollection.newBuilder().setUniqueName("red.out").build();
    PTransform red = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putOutputs("out", redOut.getUniqueName()).build();
    PCollection threeOut = PCollection.newBuilder().setUniqueName("three.out").build();
    PTransform three = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", redOut.getUniqueName()).putOutputs("out", threeOut.getUniqueName()).build();
    PCollection oneOut = PCollection.newBuilder().setUniqueName("one.out").build();
    PTransform one = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", redOut.getUniqueName()).putOutputs("out", oneOut.getUniqueName()).build();
    PCollection twoOut = PCollection.newBuilder().setUniqueName("two.out").build();
    PTransform two = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", redOut.getUniqueName()).putOutputs("out", twoOut.getUniqueName()).build();
    PCollection sharedOut = PCollection.newBuilder().setUniqueName("shared.out").build();
    PTransform shared = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("one", oneOut.getUniqueName()).putInputs("two", twoOut.getUniqueName()).putOutputs("shared", sharedOut.getUniqueName()).build();
    PCollection otherSharedOut = PCollection.newBuilder().setUniqueName("shared.out2").build();
    PTransform otherShared = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("multi", threeOut.getUniqueName()).putInputs("two", twoOut.getUniqueName()).putOutputs("out", otherSharedOut.getUniqueName()).build();
    PCollection blueOut = PCollection.newBuilder().setUniqueName("blue.out").build();
    PTransform blue = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", sharedOut.getUniqueName()).putOutputs("out", blueOut.getUniqueName()).build();
    RunnerApi.Components components = Components.newBuilder().putTransforms("one", one).putPcollections(oneOut.getUniqueName(), oneOut).putTransforms("two", two).putPcollections(twoOut.getUniqueName(), twoOut).putTransforms("multi", three).putPcollections(threeOut.getUniqueName(), threeOut).putTransforms("shared", shared).putPcollections(sharedOut.getUniqueName(), sharedOut).putTransforms("otherShared", otherShared).putPcollections(otherSharedOut.getUniqueName(), otherSharedOut).putTransforms("red", red).putPcollections(redOut.getUniqueName(), redOut).putTransforms("blue", blue).putPcollections(blueOut.getUniqueName(), blueOut).build();
    ExecutableStage multiStage = ImmutableExecutableStage.of(components, Environment.getDefaultInstance(), PipelineNode.pCollection(redOut.getUniqueName(), redOut), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(PipelineNode.pTransform("multi", three), PipelineNode.pTransform("shared", shared), PipelineNode.pTransform("otherShared", otherShared)), ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut), PipelineNode.pCollection(otherSharedOut.getUniqueName(), otherSharedOut)), DEFAULT_WIRE_CODER_SETTINGS);
    ExecutableStage oneStage = ImmutableExecutableStage.of(components, Environment.getDefaultInstance(), PipelineNode.pCollection(redOut.getUniqueName(), redOut), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(PipelineNode.pTransform("one", one), PipelineNode.pTransform("shared", shared)), ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)), DEFAULT_WIRE_CODER_SETTINGS);
    ExecutableStage twoStage = ImmutableExecutableStage.of(components, Environment.getDefaultInstance(), PipelineNode.pCollection(redOut.getUniqueName(), redOut), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(PipelineNode.pTransform("two", two), PipelineNode.pTransform("otherShared", otherShared)), ImmutableList.of(PipelineNode.pCollection(otherSharedOut.getUniqueName(), otherSharedOut)), DEFAULT_WIRE_CODER_SETTINGS);
    PTransformNode redTransform = PipelineNode.pTransform("red", red);
    PTransformNode blueTransform = PipelineNode.pTransform("blue", blue);
    QueryablePipeline pipeline = QueryablePipeline.forPrimitivesIn(components);
    DeduplicationResult result = OutputDeduplicator.ensureSingleProducer(pipeline, ImmutableList.of(oneStage, twoStage, multiStage), ImmutableList.of(redTransform, blueTransform));
    assertThat(result.getIntroducedTransforms(), hasSize(2));
    assertThat(result.getDeduplicatedStages().keySet(), containsInAnyOrder(multiStage, oneStage, twoStage));
    assertThat(result.getDeduplicatedTransforms().keySet(), empty());
    Collection<String> introducedIds = result.getIntroducedTransforms().stream().flatMap(pt -> pt.getTransform().getInputsMap().values().stream()).collect(Collectors.toList());
    String[] stageOutputs = result.getDeduplicatedStages().values().stream().flatMap(s -> s.getOutputPCollections().stream().map(PCollectionNode::getId)).toArray(String[]::new);
    assertThat(introducedIds, containsInAnyOrder(stageOutputs));
    assertThat(result.getDeduplicatedComponents().getPcollectionsMap().keySet(), hasItems(introducedIds.toArray(new String[0])));
    assertThat(result.getDeduplicatedComponents().getTransformsMap().entrySet(), hasItems(result.getIntroducedTransforms().stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform)).entrySet().toArray(new Map.Entry[0])));
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) RunWith(org.junit.runner.RunWith) Matchers.hasItems(org.hamcrest.Matchers.hasItems) ArrayList(java.util.ArrayList) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) Map(java.util.Map) Matchers.hasSize(org.hamcrest.Matchers.hasSize) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Matchers.hasEntry(org.hamcrest.Matchers.hasEntry) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Matchers.empty(org.hamcrest.Matchers.empty) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) Collection(java.util.Collection) DeduplicationResult(org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Collectors(java.util.stream.Collectors) List(java.util.List) Matchers.containsInAnyOrder(org.hamcrest.Matchers.containsInAnyOrder) Matchers.equalTo(org.hamcrest.Matchers.equalTo) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Iterables.getOnlyElement(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables.getOnlyElement) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) DEFAULT_WIRE_CODER_SETTINGS(org.apache.beam.runners.core.construction.graph.ExecutableStage.DEFAULT_WIRE_CODER_SETTINGS) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DeduplicationResult(org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Map(java.util.Map) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 4 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class OutputDeduplicatorTest method duplicateOverStagesAndTransforms.

@Test
public void duplicateOverStagesAndTransforms() {
    /* When both a stage and a runner-executed transform produce a PCollection, all should be
     * replaced with synthetic flattens.
     * original graph:
     *             --> one -> .out \
     * red -> .out |                -> shared -> .out
     *             --------------> /
     *
     * fused graph:
     *             --> [one -> .out -> shared ->] .out
     * red -> .out |
     *             ------------------> shared --> .out
     *
     * deduplicated graph:
     *             --> [one -> .out -> shared ->] .out:0 \
     * red -> .out |                                      -> shared -> .out
     *             -----------------> shared:0 -> .out:1 /
     */
    PCollection redOut = PCollection.newBuilder().setUniqueName("red.out").build();
    PTransform red = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putOutputs("out", redOut.getUniqueName()).build();
    PCollection oneOut = PCollection.newBuilder().setUniqueName("one.out").build();
    PTransform one = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", redOut.getUniqueName()).putOutputs("out", oneOut.getUniqueName()).build();
    PCollection sharedOut = PCollection.newBuilder().setUniqueName("shared.out").build();
    PTransform shared = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("one", oneOut.getUniqueName()).putInputs("red", redOut.getUniqueName()).putOutputs("shared", sharedOut.getUniqueName()).build();
    PCollection blueOut = PCollection.newBuilder().setUniqueName("blue.out").build();
    PTransform blue = PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).build()).putInputs("in", sharedOut.getUniqueName()).putOutputs("out", blueOut.getUniqueName()).build();
    RunnerApi.Components components = Components.newBuilder().putTransforms("one", one).putPcollections(oneOut.getUniqueName(), oneOut).putTransforms("red", red).putPcollections(redOut.getUniqueName(), redOut).putTransforms("shared", shared).putPcollections(sharedOut.getUniqueName(), sharedOut).putTransforms("blue", blue).putPcollections(blueOut.getUniqueName(), blueOut).build();
    PTransformNode sharedTransform = PipelineNode.pTransform("shared", shared);
    ExecutableStage oneStage = ImmutableExecutableStage.of(components, Environment.getDefaultInstance(), PipelineNode.pCollection(redOut.getUniqueName(), redOut), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(), ImmutableList.of(PipelineNode.pTransform("one", one), sharedTransform), ImmutableList.of(PipelineNode.pCollection(sharedOut.getUniqueName(), sharedOut)), DEFAULT_WIRE_CODER_SETTINGS);
    PTransformNode redTransform = PipelineNode.pTransform("red", red);
    PTransformNode blueTransform = PipelineNode.pTransform("blue", blue);
    QueryablePipeline pipeline = QueryablePipeline.forPrimitivesIn(components);
    DeduplicationResult result = OutputDeduplicator.ensureSingleProducer(pipeline, ImmutableList.of(oneStage), ImmutableList.of(redTransform, blueTransform, sharedTransform));
    assertThat(result.getIntroducedTransforms(), hasSize(1));
    PTransformNode introduced = getOnlyElement(result.getIntroducedTransforms());
    assertThat(introduced.getTransform().getOutputsMap().size(), equalTo(1));
    assertThat(getOnlyElement(introduced.getTransform().getOutputsMap().values()), equalTo(sharedOut.getUniqueName()));
    assertThat(result.getDeduplicatedComponents().getPcollectionsMap().keySet(), hasItems(introduced.getTransform().getInputsMap().values().toArray(new String[0])));
    assertThat(result.getDeduplicatedStages().keySet(), hasSize(1));
    assertThat(result.getDeduplicatedTransforms().keySet(), containsInAnyOrder("shared"));
    List<String> introducedOutputs = new ArrayList<>();
    introducedOutputs.addAll(result.getDeduplicatedTransforms().get("shared").getTransform().getOutputsMap().values());
    introducedOutputs.addAll(result.getDeduplicatedStages().get(oneStage).getOutputPCollections().stream().map(PCollectionNode::getId).collect(Collectors.toList()));
    assertThat(introduced.getTransform().getInputsMap().values(), containsInAnyOrder(introducedOutputs.toArray(new String[0])));
    assertThat(result.getDeduplicatedComponents().getPcollectionsMap().keySet(), hasItems(introducedOutputs.toArray(new String[0])));
    assertThat(result.getDeduplicatedComponents().getTransformsMap(), hasEntry(introduced.getId(), introduced.getTransform()));
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DeduplicationResult(org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) ArrayList(java.util.ArrayList) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 5 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class QueryablePipelineTest method transformWithSameSideAndMainInput.

/**
 * Tests that inputs that are both side inputs and main inputs are returned from {@link
 * QueryablePipeline#getPerElementConsumers(PCollectionNode)} and {@link
 * QueryablePipeline#getSideInputs(PTransformNode)}.
 */
@Test
public void transformWithSameSideAndMainInput() {
    Components components = Components.newBuilder().putPcollections("read_pc", RunnerApi.PCollection.getDefaultInstance()).putPcollections("pardo_out", RunnerApi.PCollection.getDefaultInstance()).putTransforms("root", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN).build()).putOutputs("out", "read_pc").build()).putTransforms("multiConsumer", PTransform.newBuilder().putInputs("main_in", "read_pc").putInputs("side_in", "read_pc").putOutputs("out", "pardo_out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().putSideInputs("side_in", SideInput.getDefaultInstance()).build().toByteString()).build()).build()).build();
    QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
    PCollectionNode multiInputPc = PipelineNode.pCollection("read_pc", components.getPcollectionsOrThrow("read_pc"));
    PTransformNode multiConsumerPT = PipelineNode.pTransform("multiConsumer", components.getTransformsOrThrow("multiConsumer"));
    SideInputReference sideInputRef = SideInputReference.of(multiConsumerPT, "side_in", multiInputPc);
    assertThat(qp.getPerElementConsumers(multiInputPc), contains(multiConsumerPT));
    assertThat(qp.getSideInputs(multiConsumerPT), contains(sideInputRef));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)55 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)49 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)40 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)31 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 Map (java.util.Map)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 IOException (java.io.IOException)15 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)15 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)14 Coder (org.apache.beam.sdk.coders.Coder)14 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 Pipeline (org.apache.beam.sdk.Pipeline)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)11 KvCoder (org.apache.beam.sdk.coders.KvCoder)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)10