Search in sources :

Example 11 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class GreedyPipelineFuserTest method statefulParDoRootsStage.

/*
   * impulse -> .out -> parDo -> .out -> stateful -> .out
   * becomes
   * (impulse.out) -> parDo -> (parDo.out)
   * (parDo.out) -> stateful
   */
@Test
public void statefulParDoRootsStage() {
    // (impulse.out) -> parDo -> (parDo.out)
    // (parDo.out) -> stateful -> stateful.out
    // stateful has a state spec which prevents it from fusing with an upstream ParDo
    PTransform parDoTransform = PTransform.newBuilder().setUniqueName("ParDo").putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform statefulTransform = PTransform.newBuilder().setUniqueName("StatefulParDo").putInputs("input", "parDo.out").putOutputs("output", "stateful.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putStateSpecs("state", StateSpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    Components components = partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", pc("parDo.out")).putTransforms("stateful", statefulTransform).putPcollections("stateful.out", pc("stateful.out")).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN).build());
    assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
    assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("impulse.out").withOutputs("parDo.out").withTransforms("parDo"), ExecutableStageMatcher.withInput("parDo.out").withNoOutputs().withTransforms("stateful")));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 12 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class GreedyStageFuserTest method fusesCompatibleEnvironments.

@Test
public void fusesCompatibleEnvironments() {
    // (impulse.out) -> parDo -> parDo.out -> window -> window.out
    // parDo and window both have the environment "common" and can be fused together
    PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform windowTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform), PipelineNode.pTransform("window", windowTransform)));
    // Nothing consumes the outputs of ParDo or Window, so they don't have to be materialized
    assertThat(subgraph.getOutputPCollections(), emptyIterable());
    assertThat(subgraph, hasSubtransforms("parDo", "window"));
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 13 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class GreedyStageFuserTest method materializesWithGroupByKeyConsumer.

@Test
public void materializesWithGroupByKeyConsumer() {
    // (impulse.out) -> read -> read.out -> gbk -> gbk.out
    // Fuses to
    // (impulse.out) -> read -> (read.out)
    // GBK is the responsibility of the runner, so it is not included in a stage.
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("gbk", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "gbk.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).build()).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
    PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
    assertThat(subgraph.getOutputPCollections(), contains(readOutput));
    assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 14 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class GreedyStageFuserTest method noEnvironmentThrows.

@Test
public void noEnvironmentThrows() {
    // (impulse.out) -> runnerTransform -> gbk.out
    // runnerTransform can't be executed in an environment, so trying to construct it should fail
    PTransform gbkTransform = PTransform.newBuilder().putInputs("input", "impulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).putOutputs("output", "gbk.out").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("runnerTransform", gbkTransform).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("gbk.out").build()).build());
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Environment must be populated");
    GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("runnerTransform", gbkTransform)));
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 15 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class GreedyStageFuserTest method fusesFlatten.

@Test
public void fusesFlatten() {
    // (impulse.out) -> parDo -> parDo.out --> flatten -> flatten.out -> window -> window.out
    // \                     /
    // -> read -> read.out -
    // The flatten can be executed within the same environment as any transform; the window can
    // execute in the same environment as the rest of the transforms, and can fuse with the stage
    PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform flattenTransform = PTransform.newBuilder().putInputs("readInput", "read.out").putInputs("parDoInput", "parDo.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN)).build();
    PTransform windowTransform = PTransform.newBuilder().putInputs("input", "flatten.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("flatten", flattenTransform).putPcollections("flatten.out", PCollection.newBuilder().setUniqueName("flatten.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
    assertThat(subgraph.getOutputPCollections(), emptyIterable());
    assertThat(subgraph, hasSubtransforms("read", "parDo", "flatten", "window"));
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Aggregations

PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)58 Test (org.junit.Test)34 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)19 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)18 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)16 Map (java.util.Map)14 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)10 Collection (java.util.Collection)7 Pipeline (org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)7 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 Collectors (java.util.stream.Collectors)6 DeduplicationResult (org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult)6 LinkedHashSet (java.util.LinkedHashSet)5 RemoteGrpcPort (org.apache.beam.model.fnexecution.v1.BeamFnApi.RemoteGrpcPort)5 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)5 ExecutableStagePayload (org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload)4 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)4