Search in sources :

Example 6 with PTransformNode

use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.

the class GreedyStageFuserTest method sideInputIncludedInStage.

@Test
public void sideInputIncludedInStage() {
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().setUniqueName("read").putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform parDoTransform = PTransform.newBuilder().setUniqueName("parDo").putInputs("input", "read.out").putInputs("side_input", "side_read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    PCollection sideInputPCollection = PCollection.newBuilder().setUniqueName("side_read.out").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("side_read", PTransform.newBuilder().setUniqueName("side_read").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).putInputs("input", "impulse.out").putOutputs("output", "side_read.out").build()).putPcollections("side_read.out", sideInputPCollection).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("read", readTransform)));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, readOutput, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform)));
    PTransformNode parDoNode = PipelineNode.pTransform("parDo", parDoTransform);
    SideInputReference sideInputRef = SideInputReference.of(parDoNode, "side_input", PipelineNode.pCollection("side_read.out", sideInputPCollection));
    assertThat(subgraph.getSideInputs(), contains(sideInputRef));
    assertThat(subgraph.getOutputPCollections(), emptyIterable());
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 7 with PTransformNode

use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.

the class GreedyStageFuserTest method materializesWithSideInputConsumer.

@Test
public void materializesWithSideInputConsumer() {
    // (impulse.out) -> read -> read.out -----------> parDo -> parDo.out -> window -> window.out
    // (impulse.out) -> side_read -> side_read.out /
    // Where parDo takes side_read as a side input, fuses into
    // (impulse.out) -> read -> (read.out)
    // (impulse.out) -> side_read -> (side_read.out)
    // (read.out) -> parDo -> parDo.out -> window -> window.out
    // parDo doesn't have a per-element consumer from side_read.out, so it can't root a stage
    // which consumes from that materialized collection. Nodes with side inputs must root a stage,
    // but do not restrict fusion of consumers.
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("side_read", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).putInputs("input", "impulse.out").putOutputs("output", "side_read.out").build()).putPcollections("side_read.out", PCollection.newBuilder().setUniqueName("side_read.out").build()).putTransforms("parDo", PTransform.newBuilder().putInputs("input", "read.out").putInputs("side_input", "side_read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build()).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", env).build());
    PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
    assertThat(subgraph.getOutputPCollections(), contains(readOutput));
    assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 8 with PTransformNode

use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.

the class QueryablePipelineTest method forTransformsWithSubgraph.

@Test
public void forTransformsWithSubgraph() {
    Components components = Components.newBuilder().putTransforms("root", PTransform.newBuilder().putOutputs("output", "output.out").build()).putPcollections("output.out", RunnerApi.PCollection.newBuilder().setUniqueName("output.out").build()).putTransforms("consumer", PTransform.newBuilder().putInputs("input", "output.out").build()).putTransforms("ignored", PTransform.newBuilder().putInputs("input", "output.out").build()).build();
    QueryablePipeline pipeline = QueryablePipeline.forTransforms(ImmutableSet.of("root", "consumer"), components);
    assertThat(pipeline.getRootTransforms(), contains(PipelineNode.pTransform("root", components.getTransformsOrThrow("root"))));
    Set<PTransformNode> consumers = pipeline.getPerElementConsumers(PipelineNode.pCollection("output.out", components.getPcollectionsOrThrow("output.out")));
    assertThat(consumers, contains(PipelineNode.pTransform("consumer", components.getTransformsOrThrow("consumer"))));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Test(org.junit.Test)

Example 9 with PTransformNode

use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.

the class GreedyStageFuserTest method materializesWithGroupByKeyConsumer.

@Test
public void materializesWithGroupByKeyConsumer() {
    // (impulse.out) -> read -> read.out -> gbk -> gbk.out
    // Fuses to
    // (impulse.out) -> read -> (read.out)
    // GBK is the responsibility of the runner, so it is not included in a stage.
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("gbk", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "gbk.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).build()).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
    PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
    assertThat(subgraph.getOutputPCollections(), contains(readOutput));
    assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 10 with PTransformNode

use of org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode in project beam by apache.

the class ExecutableStage method fromPayload.

/**
 * Return an {@link ExecutableStage} constructed from the provided {@link FunctionSpec}
 * representation.
 *
 * <p>See {@link #toPTransform} for how the payload is constructed.
 *
 * <p>Note: The payload contains some information redundant with the {@link PTransform} it is the
 * payload of. The {@link ExecutableStagePayload} should be sufficiently rich to construct a
 * {@code ProcessBundleDescriptor} using only the payload.
 */
static ExecutableStage fromPayload(ExecutableStagePayload payload) {
    Components components = payload.getComponents();
    Environment environment = payload.getEnvironment();
    Collection<WireCoderSetting> wireCoderSettings = payload.getWireCoderSettingsList();
    PCollectionNode input = PipelineNode.pCollection(payload.getInput(), components.getPcollectionsOrThrow(payload.getInput()));
    List<SideInputReference> sideInputs = payload.getSideInputsList().stream().map(sideInputId -> SideInputReference.fromSideInputId(sideInputId, components)).collect(Collectors.toList());
    List<UserStateReference> userStates = payload.getUserStatesList().stream().map(userStateId -> UserStateReference.fromUserStateId(userStateId, components)).collect(Collectors.toList());
    List<TimerReference> timers = payload.getTimersList().stream().map(timerId -> TimerReference.fromTimerId(timerId, components)).collect(Collectors.toList());
    List<PTransformNode> transforms = payload.getTransformsList().stream().map(id -> PipelineNode.pTransform(id, components.getTransformsOrThrow(id))).collect(Collectors.toList());
    List<PCollectionNode> outputs = payload.getOutputsList().stream().map(id -> PipelineNode.pCollection(id, components.getPcollectionsOrThrow(id))).collect(Collectors.toList());
    return ImmutableExecutableStage.of(components, environment, input, sideInputs, userStates, timers, transforms, outputs, wireCoderSettings);
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Collection(java.util.Collection) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) Collectors(java.util.stream.Collectors) UserStateId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.UserStateId) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) List(java.util.List) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) TimerId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.TimerId) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) SideInputId(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.SideInputId) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) Collections(java.util.Collections) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment)

Aggregations

PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)33 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)22 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)20 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)20 Test (org.junit.Test)20 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)15 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)12 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)9 Collection (java.util.Collection)8 Map (java.util.Map)8 LinkedHashSet (java.util.LinkedHashSet)7 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 Collectors (java.util.stream.Collectors)6 DeduplicationResult (org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult)6 Pipeline (org.apache.beam.sdk.Pipeline)6 PTransformTranslation (org.apache.beam.runners.core.construction.PTransformTranslation)5 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)5 HashMap (java.util.HashMap)4 TreeSet (java.util.TreeSet)4