Search in sources :

Example 11 with Components

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuserTest method parDoWithStateAndTimerRootsStage.

/*
   * Tests that parDo with state and timers is fused correctly and can be queried
   * impulse -> .out -> timer -> .out
   * becomes
   * (impulse.out) -> timer
   */
@Test
public void parDoWithStateAndTimerRootsStage() {
    PTransform timerTransform = PTransform.newBuilder().setUniqueName("TimerParDo").putInputs("input", "impulse.out").putInputs("timer", "timer.out").putOutputs("timer", "timer.out").putOutputs("output", "output.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putStateSpecs("state", StateSpec.getDefaultInstance()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    Components components = partialComponents.toBuilder().putTransforms("timer", timerTransform).putPcollections("timer.out", pc("timer.out")).putPcollections("output.out", pc("output.out")).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN).build());
    assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
    assertThat(fused.getFusedStages(), contains(ExecutableStageMatcher.withInput("impulse.out").withNoOutputs().withTransforms("timer")));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 12 with Components

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuserTest method statefulParDoRootsStage.

/*
   * impulse -> .out -> parDo -> .out -> stateful -> .out
   * becomes
   * (impulse.out) -> parDo -> (parDo.out)
   * (parDo.out) -> stateful
   */
@Test
public void statefulParDoRootsStage() {
    // (impulse.out) -> parDo -> (parDo.out)
    // (parDo.out) -> stateful -> stateful.out
    // stateful has a state spec which prevents it from fusing with an upstream ParDo
    PTransform parDoTransform = PTransform.newBuilder().setUniqueName("ParDo").putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform statefulTransform = PTransform.newBuilder().setUniqueName("StatefulParDo").putInputs("input", "parDo.out").putOutputs("output", "stateful.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putStateSpecs("state", StateSpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    Components components = partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", pc("parDo.out")).putTransforms("stateful", statefulTransform).putPcollections("stateful.out", pc("stateful.out")).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN).build());
    assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
    assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("impulse.out").withOutputs("parDo.out").withTransforms("parDo"), ExecutableStageMatcher.withInput("parDo.out").withNoOutputs().withTransforms("stateful")));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 13 with Components

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuserTest method sideInputRootsNewStage.

/*
   * impulseA -> .out -> read -> .out -> leftParDo -> .out
   *                                  \ -> rightParDo -> .out
   *                                   ------> sideInputParDo -> .out
   *                                        /
   * impulseB -> .out -> side_read -> .out /
   *
   * becomes
   * (impulseA.out) -> read -> (read.out)
   * (read.out) -> leftParDo
   *            \
   *             -> rightParDo
   * (read.out) -> sideInputParDo
   * (impulseB.out) -> side_read
   */
@Test
public void sideInputRootsNewStage() {
    Components components = Components.newBuilder().putCoders("coder", Coder.newBuilder().build()).putCoders("windowCoder", Coder.newBuilder().build()).putWindowingStrategies("ws", WindowingStrategy.newBuilder().setWindowCoderId("windowCoder").build()).putTransforms("mainImpulse", PTransform.newBuilder().setUniqueName("MainImpulse").putOutputs("output", "mainImpulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build()).putPcollections("mainImpulse.out", pc("mainImpulse.out")).putTransforms("read", PTransform.newBuilder().setUniqueName("Read").putInputs("input", "mainImpulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("read.out", pc("read.out")).putTransforms("sideImpulse", PTransform.newBuilder().setUniqueName("SideImpulse").putOutputs("output", "sideImpulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build()).putPcollections("sideImpulse.out", pc("sideImpulse.out")).putTransforms("sideRead", PTransform.newBuilder().setUniqueName("SideRead").putInputs("input", "sideImpulse.out").putOutputs("output", "sideRead.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("sideRead.out", pc("sideRead.out")).putTransforms("leftParDo", PTransform.newBuilder().setUniqueName("LeftParDo").putInputs("main", "read.out").putOutputs("output", "leftParDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString()).build()).setEnvironmentId("py").build()).putPcollections("leftParDo.out", pc("leftParDo.out")).putTransforms("rightParDo", PTransform.newBuilder().setUniqueName("RightParDo").putInputs("main", "read.out").putOutputs("output", "rightParDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString()).build()).setEnvironmentId("py").build()).putPcollections("rightParDo.out", pc("rightParDo.out")).putTransforms("sideParDo", PTransform.newBuilder().setUniqueName("SideParDo").putInputs("main", "read.out").putInputs("side", "sideRead.out").putOutputs("output", "sideParDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side", SideInput.getDefaultInstance()).build().toByteString()).build()).setEnvironmentId("py").build()).putPcollections("sideParDo.out", pc("sideParDo.out")).putEnvironments("py", Environments.createDockerEnvironment("py")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).build());
    assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("mainImpulse", components.getTransformsOrThrow("mainImpulse")), PipelineNode.pTransform("sideImpulse", components.getTransformsOrThrow("sideImpulse"))));
    assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("mainImpulse.out").withOutputs("read.out").withTransforms("read"), ExecutableStageMatcher.withInput("read.out").withNoOutputs().withTransforms("leftParDo", "rightParDo"), ExecutableStageMatcher.withInput("read.out").withSideInputs(RunnerApi.ExecutableStagePayload.SideInputId.newBuilder().setTransformId("sideParDo").setLocalName("side").build()).withNoOutputs().withTransforms("sideParDo"), ExecutableStageMatcher.withInput("sideImpulse.out").withOutputs("sideRead.out").withTransforms("sideRead")));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Test(org.junit.Test)

Example 14 with Components

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuserTest method multipleEnvironmentsBecomesMultipleStages.

/*
   * impulse -> .out -> read -> .out --> goTransform -> .out
   *                                  \
   *                                   -> pyTransform -> .out
   * becomes (impulse.out) -> read -> (read.out)
   *         (read.out) -> goTransform
   *         (read.out) -> pyTransform
   */
@Test
public void multipleEnvironmentsBecomesMultipleStages() {
    Components components = partialComponents.toBuilder().putTransforms("read", PTransform.newBuilder().setUniqueName("Read").putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("read.out", pc("read.out")).putTransforms("goTransform", PTransform.newBuilder().setUniqueName("GoTransform").putInputs("input", "read.out").putOutputs("output", "go.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("go").build()).putPcollections("go.out", pc("go.out")).putTransforms("pyTransform", PTransform.newBuilder().setUniqueName("PyTransform").putInputs("input", "read.out").putOutputs("output", "py.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("py.out", pc("py.out")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).build());
    // Impulse is the runner transform
    assertThat(fused.getRunnerExecutedTransforms(), hasSize(1));
    assertThat(fused.getFusedStages(), hasSize(3));
    assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("impulse.out").withOutputs("read.out").withTransforms("read"), ExecutableStageMatcher.withInput("read.out").withNoOutputs().withTransforms("pyTransform"), ExecutableStageMatcher.withInput("read.out").withNoOutputs().withTransforms("goTransform")));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Test(org.junit.Test)

Example 15 with Components

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuserTest method flattenAfterNoEnvDoesNotFuse.

/*
   * impulseA -> .out -> flatten -> .out -> read -> .out -> parDo -> .out
   * becomes
   * (flatten.out) -> read -> parDo
   *
   * Flatten, specifically, doesn't fuse greedily into downstream environments or act as a sibling
   * to any of those nodes, but the routing is instead handled by the Runner.
   */
@Test
public void flattenAfterNoEnvDoesNotFuse() {
    Components components = partialComponents.toBuilder().putTransforms("flatten", PTransform.newBuilder().setUniqueName("Flatten").putInputs("impulseInput", "impulse.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).build()).build()).putPcollections("flatten.out", pc("flatten.out")).putTransforms("read", PTransform.newBuilder().setUniqueName("Read").putInputs("input", "flatten.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("read.out", pc("read.out")).putTransforms("parDo", PTransform.newBuilder().setUniqueName("ParDo").putInputs("input", "read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder().build()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("parDo.out", pc("parDo.out")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).build());
    assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse")), PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"))));
    assertThat(fused.getFusedStages(), contains(ExecutableStageMatcher.withInput("flatten.out").withNoOutputs().withTransforms("read", "parDo")));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Test(org.junit.Test)

Aggregations

Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)49 Test (org.junit.Test)37 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)19 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)18 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)12 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)10 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)10 Map (java.util.Map)9 Pipeline (org.apache.beam.sdk.Pipeline)9 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)7 Collection (java.util.Collection)6 Collectors (java.util.stream.Collectors)6 List (java.util.List)5 ExecutableStagePayload (org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload)5 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)5 DeduplicationResult (org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult)5 ArrayList (java.util.ArrayList)4 Pipeline (org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)4 PCollection (org.apache.beam.sdk.values.PCollection)4 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)4