use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyPipelineFuserTest method statefulParDoRootsStage.
/*
* impulse -> .out -> parDo -> .out -> stateful -> .out
* becomes
* (impulse.out) -> parDo -> (parDo.out)
* (parDo.out) -> stateful
*/
@Test
public void statefulParDoRootsStage() {
// (impulse.out) -> parDo -> (parDo.out)
// (parDo.out) -> stateful -> stateful.out
// stateful has a state spec which prevents it from fusing with an upstream ParDo
PTransform parDoTransform = PTransform.newBuilder().setUniqueName("ParDo").putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform statefulTransform = PTransform.newBuilder().setUniqueName("StatefulParDo").putInputs("input", "parDo.out").putOutputs("output", "stateful.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putStateSpecs("state", StateSpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
Components components = partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", pc("parDo.out")).putTransforms("stateful", statefulTransform).putPcollections("stateful.out", pc("stateful.out")).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN).build());
assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("impulse.out").withOutputs("parDo.out").withTransforms("parDo"), ExecutableStageMatcher.withInput("parDo.out").withNoOutputs().withTransforms("stateful")));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method fusesCompatibleEnvironments.
@Test
public void fusesCompatibleEnvironments() {
// (impulse.out) -> parDo -> parDo.out -> window -> window.out
// parDo and window both have the environment "common" and can be fused together
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform windowTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform), PipelineNode.pTransform("window", windowTransform)));
// Nothing consumes the outputs of ParDo or Window, so they don't have to be materialized
assertThat(subgraph.getOutputPCollections(), emptyIterable());
assertThat(subgraph, hasSubtransforms("parDo", "window"));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method materializesWithGroupByKeyConsumer.
@Test
public void materializesWithGroupByKeyConsumer() {
// (impulse.out) -> read -> read.out -> gbk -> gbk.out
// Fuses to
// (impulse.out) -> read -> (read.out)
// GBK is the responsibility of the runner, so it is not included in a stage.
Environment env = Environments.createDockerEnvironment("common");
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("gbk", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "gbk.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).build()).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
assertThat(subgraph.getOutputPCollections(), contains(readOutput));
assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method noEnvironmentThrows.
@Test
public void noEnvironmentThrows() {
// (impulse.out) -> runnerTransform -> gbk.out
// runnerTransform can't be executed in an environment, so trying to construct it should fail
PTransform gbkTransform = PTransform.newBuilder().putInputs("input", "impulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).putOutputs("output", "gbk.out").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("runnerTransform", gbkTransform).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("gbk.out").build()).build());
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Environment must be populated");
GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("runnerTransform", gbkTransform)));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method fusesFlatten.
@Test
public void fusesFlatten() {
// (impulse.out) -> parDo -> parDo.out --> flatten -> flatten.out -> window -> window.out
// \ /
// -> read -> read.out -
// The flatten can be executed within the same environment as any transform; the window can
// execute in the same environment as the rest of the transforms, and can fuse with the stage
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform flattenTransform = PTransform.newBuilder().putInputs("readInput", "read.out").putInputs("parDoInput", "parDo.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN)).build();
PTransform windowTransform = PTransform.newBuilder().putInputs("input", "flatten.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("flatten", flattenTransform).putPcollections("flatten.out", PCollection.newBuilder().setUniqueName("flatten.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
assertThat(subgraph.getOutputPCollections(), emptyIterable());
assertThat(subgraph, hasSubtransforms("read", "parDo", "flatten", "window"));
}
Aggregations