use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method materializesWithSideInputConsumer.
@Test
public void materializesWithSideInputConsumer() {
// (impulse.out) -> read -> read.out -----------> parDo -> parDo.out -> window -> window.out
// (impulse.out) -> side_read -> side_read.out /
// Where parDo takes side_read as a side input, fuses into
// (impulse.out) -> read -> (read.out)
// (impulse.out) -> side_read -> (side_read.out)
// (read.out) -> parDo -> parDo.out -> window -> window.out
// parDo doesn't have a per-element consumer from side_read.out, so it can't root a stage
// which consumes from that materialized collection. Nodes with side inputs must root a stage,
// but do not restrict fusion of consumers.
Environment env = Environments.createDockerEnvironment("common");
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("side_read", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).putInputs("input", "impulse.out").putOutputs("output", "side_read.out").build()).putPcollections("side_read.out", PCollection.newBuilder().setUniqueName("side_read.out").build()).putTransforms("parDo", PTransform.newBuilder().putInputs("input", "read.out").putInputs("side_input", "side_read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build()).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", env).build());
PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
assertThat(subgraph.getOutputPCollections(), contains(readOutput));
assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method materializesWithDifferentEnvConsumer.
@Test
public void materializesWithDifferentEnvConsumer() {
// (impulse.out) -> parDo -> parDo.out -> window -> window.out
// Fuses into
// (impulse.out) -> parDo -> (parDo.out)
// (parDo.out) -> window -> window.out
Environment env = Environments.createDockerEnvironment("common");
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("out", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PCollection parDoOutput = PCollection.newBuilder().setUniqueName("parDo.out").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", parDoOutput).putTransforms("window", PTransform.newBuilder().putInputs("input", "parDo.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("rare").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("rare", Environments.createDockerEnvironment("rare")).putEnvironments("common", env).build());
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("parDo.out", parDoOutput)));
assertThat(subgraph.getInputPCollection(), equalTo(impulseOutputNode));
assertThat(subgraph.getEnvironment(), equalTo(env));
assertThat(subgraph.getTransforms(), contains(PipelineNode.pTransform("parDo", parDoTransform)));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method executableStageProducingSideInputMaterializesIt.
@Test
public void executableStageProducingSideInputMaterializesIt() {
// impulse -- ParDo(createSide)
// \_ ParDo(processMain) with side input from createSide
// The ExecutableStage executing createSide must have an output.
Environment env = Environments.createDockerEnvironment("common");
PTransform impulse = PTransform.newBuilder().setUniqueName("impulse").putOutputs("output", "impulsePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build();
PTransform createSide = PTransform.newBuilder().setUniqueName("createSide").putInputs("input", "impulsePC").putOutputs("output", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform processMain = PTransform.newBuilder().setUniqueName("processMain").putInputs("main", "impulsePC").putInputs("side", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
PCollection sidePC = PCollection.newBuilder().setUniqueName("sidePC").build();
PCollection impulsePC = PCollection.newBuilder().setUniqueName("impulsePC").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("impulse", impulse).putTransforms("createSide", createSide).putTransforms("processMain", processMain).putPcollections("impulsePC", impulsePC).putPcollections("sidePC", sidePC).putEnvironments("common", env).build());
PCollectionNode impulseOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("impulse", impulse)));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutput, ImmutableSet.of(PipelineNode.pTransform("createSide", createSide)));
assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("sidePC", sidePC)));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ProtoOverridesTest method replacesMultiple.
@Test
public void replacesMultiple() {
RunnerApi.Pipeline p = Pipeline.newBuilder().addAllRootTransformIds(ImmutableList.of("first", "second")).setComponents(Components.newBuilder().putTransforms("first", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:first")).build()).putTransforms("second", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:repeated")).build()).putTransforms("third", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:repeated")).build()).putPcollections("intermediatePc", PCollection.newBuilder().setUniqueName("intermediate").build()).putCoders("coder", Coder.newBuilder().setSpec(FunctionSpec.getDefaultInstance()).build())).build();
ByteString newPayload = ByteString.copyFrom("foo-bar-baz".getBytes(StandardCharsets.UTF_8));
Pipeline updated = ProtoOverrides.updateTransform("beam:repeated", p, (transformId, existingComponents) -> {
String subtransform = String.format("%s_sub", transformId);
return MessageWithComponents.newBuilder().setPtransform(PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:repeated:replacement").setPayload(newPayload)).addSubtransforms(subtransform)).setComponents(Components.newBuilder().putTransforms(subtransform, PTransform.newBuilder().setUniqueName(subtransform).build())).build();
});
PTransform updatedSecond = updated.getComponents().getTransformsOrThrow("second");
PTransform updatedThird = updated.getComponents().getTransformsOrThrow("third");
assertThat(updatedSecond, not(equalTo(p.getComponents().getTransformsOrThrow("second"))));
assertThat(updatedThird, not(equalTo(p.getComponents().getTransformsOrThrow("third"))));
assertThat(updatedSecond.getSubtransformsList(), contains("second_sub"));
assertThat(updatedSecond.getSpec().getPayload(), equalTo(newPayload));
assertThat(updatedThird.getSubtransformsList(), contains("third_sub"));
assertThat(updatedThird.getSpec().getPayload(), equalTo(newPayload));
assertThat(updated.getComponents().getTransformsMap(), hasKey("second_sub"));
assertThat(updated.getComponents().getTransformsMap(), hasKey("third_sub"));
assertThat(updated.getComponents().getTransformsOrThrow("second_sub").getUniqueName(), equalTo("second_sub"));
assertThat(updated.getComponents().getTransformsOrThrow("third_sub").getUniqueName(), equalTo("third_sub"));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyPipelineFuserTest method parDoWithStateAndTimerRootsStage.
/*
* Tests that parDo with state and timers is fused correctly and can be queried
* impulse -> .out -> timer -> .out
* becomes
* (impulse.out) -> timer
*/
@Test
public void parDoWithStateAndTimerRootsStage() {
PTransform timerTransform = PTransform.newBuilder().setUniqueName("TimerParDo").putInputs("input", "impulse.out").putInputs("timer", "timer.out").putOutputs("timer", "timer.out").putOutputs("output", "output.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putStateSpecs("state", StateSpec.getDefaultInstance()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
Components components = partialComponents.toBuilder().putTransforms("timer", timerTransform).putPcollections("timer.out", pc("timer.out")).putPcollections("output.out", pc("output.out")).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN).build());
assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
assertThat(fused.getFusedStages(), contains(ExecutableStageMatcher.withInput("impulse.out").withNoOutputs().withTransforms("timer")));
}
Aggregations