use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ExecutableStageTest method testRoundTripToFromTransform.
@Test
public void testRoundTripToFromTransform() throws Exception {
Environment env = org.apache.beam.runners.core.construction.Environments.createDockerEnvironment("foo");
PTransform pt = PTransform.newBuilder().putInputs("input", "input.out").putInputs("side_input", "sideInput.in").putInputs("timer", "timer.out").putOutputs("output", "output.out").putOutputs("timer", "timer.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).putStateSpecs("user_state", StateSpec.getDefaultInstance()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("foo").build();
PCollection input = PCollection.newBuilder().setUniqueName("input.out").build();
PCollection sideInput = PCollection.newBuilder().setUniqueName("sideInput.in").build();
PCollection timer = PCollection.newBuilder().setUniqueName("timer.out").build();
PCollection output = PCollection.newBuilder().setUniqueName("output.out").build();
Components components = Components.newBuilder().putTransforms("pt", pt).putPcollections("input.out", input).putPcollections("sideInput.in", sideInput).putPcollections("timer.out", timer).putPcollections("output.out", output).putEnvironments("foo", env).build();
PTransformNode transformNode = PipelineNode.pTransform("pt", pt);
SideInputReference sideInputRef = SideInputReference.of(transformNode, "side_input", PipelineNode.pCollection("sideInput.in", sideInput));
UserStateReference userStateRef = UserStateReference.of(transformNode, "user_state", PipelineNode.pCollection("input.out", input));
TimerReference timerRef = TimerReference.of(transformNode, "timer");
ImmutableExecutableStage stage = ImmutableExecutableStage.of(components, env, PipelineNode.pCollection("input.out", input), Collections.singleton(sideInputRef), Collections.singleton(userStateRef), Collections.singleton(timerRef), Collections.singleton(PipelineNode.pTransform("pt", pt)), Collections.singleton(PipelineNode.pCollection("output.out", output)), DEFAULT_WIRE_CODER_SETTINGS);
PTransform stagePTransform = stage.toPTransform("foo");
assertThat(stagePTransform.getOutputsMap(), hasValue("output.out"));
assertThat(stagePTransform.getOutputsCount(), equalTo(1));
assertThat(stagePTransform.getInputsMap(), allOf(hasValue("input.out"), hasValue("sideInput.in")));
assertThat(stagePTransform.getInputsCount(), equalTo(2));
ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(stagePTransform.getSpec().getPayload());
assertThat(payload.getTransformsList(), contains("pt"));
assertThat(ExecutableStage.fromPayload(payload), equalTo(stage));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class ExecutableStageTest method testRoundTripToFromTransformFused.
@Test
public void testRoundTripToFromTransformFused() throws Exception {
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform windowTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
Components components = Components.newBuilder().putTransforms("impulse", PTransform.newBuilder().putOutputs("output", "impulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build()).putPcollections("impulse.out", PCollection.newBuilder().setUniqueName("impulse.out").build()).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(components);
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, PipelineNode.pCollection("impulse.out", PCollection.newBuilder().setUniqueName("impulse.out").build()), ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform), PipelineNode.pTransform("window", windowTransform)));
PTransform ptransform = subgraph.toPTransform("foo");
assertThat(ptransform.getSpec().getUrn(), equalTo(ExecutableStage.URN));
assertThat(ptransform.getInputsMap().values(), containsInAnyOrder("impulse.out"));
assertThat(ptransform.getOutputsMap().values(), emptyIterable());
ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(ptransform.getSpec().getPayload());
assertThat(payload.getTransformsList(), contains("parDo", "window"));
ExecutableStage desered = ExecutableStage.fromPayload(payload);
assertThat(desered, equalTo(subgraph));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class FusedPipelineTest method assertRootsInTopologicalOrder.
// For each transform in the root transforms, asserts that all consumed PCollections have been
// produced, and no produced PCollection has been consumed
private void assertRootsInTopologicalOrder(RunnerApi.Pipeline fusedProto) {
Set<String> consumedPCollections = new HashSet<>();
Set<String> producedPCollections = new HashSet<>();
for (int i = 0; i < fusedProto.getRootTransformIdsCount(); i++) {
PTransform rootTransform = fusedProto.getComponents().getTransformsOrThrow(fusedProto.getRootTransformIds(i));
assertThat(String.format("All %s consumed by %s must be produced before it", PCollection.class.getSimpleName(), fusedProto.getRootTransformIds(i)), producedPCollections, hasItems(rootTransform.getInputsMap().values().toArray(new String[0])));
for (String consumed : consumedPCollections) {
assertThat(String.format("%s %s was consumed before all of its producers produced it", PCollection.class.getSimpleName(), consumed), rootTransform.getOutputsMap().values(), not(hasItem(consumed)));
}
consumedPCollections.addAll(rootTransform.getInputsMap().values());
producedPCollections.addAll(rootTransform.getOutputsMap().values());
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method fusesFlattenWithDifferentEnvironmentInputs.
@Test
public void fusesFlattenWithDifferentEnvironmentInputs() {
// (impulse.out) -> read -> read.out \ -> window -> window.out
// -------> flatten -> flatten.out /
// (impulse.out) -> envRead -> envRead.out /
// fuses into
// read -> read.out -> flatten -> flatten.out -> window -> window.out
// envRead -> envRead.out -> flatten -> (flatten.out)
// (flatten.out) -> window -> window.out
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform otherEnvRead = PTransform.newBuilder().putInputs("impulse", "impulse.out").putOutputs("output", "envRead.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("rare").build();
PTransform flattenTransform = PTransform.newBuilder().putInputs("readInput", "read.out").putInputs("otherEnvInput", "envRead.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN)).build();
PTransform windowTransform = PTransform.newBuilder().putInputs("input", "flatten.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
Components components = partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("envRead", otherEnvRead).putPcollections("envRead.out", PCollection.newBuilder().setUniqueName("envRead.out").build()).putTransforms("flatten", flattenTransform).putPcollections("flatten.out", PCollection.newBuilder().setUniqueName("flatten.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).putEnvironments("rare", Environments.createDockerEnvironment("rare")).build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(components);
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("read", readTransform)));
assertThat(subgraph.getOutputPCollections(), emptyIterable());
assertThat(subgraph, hasSubtransforms("read", "flatten", "window"));
// Flatten shows up in both of these subgraphs, but elements only go through a path to the
// flatten once.
ExecutableStage readFromOtherEnv = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("envRead", otherEnvRead)));
assertThat(readFromOtherEnv.getOutputPCollections(), contains(PipelineNode.pCollection("flatten.out", components.getPcollectionsOrThrow("flatten.out"))));
assertThat(readFromOtherEnv, hasSubtransforms("envRead", "flatten"));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.
the class GreedyStageFuserTest method materializesWithConsumerWithTimer.
@Test
public void materializesWithConsumerWithTimer() {
// (impulse.out) -> parDo -> (parDo.out)
// (parDo.out) -> timer -> timer.out
// timer has a timer spec which prevents it from fusing with an upstream ParDo
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform timerTransform = PTransform.newBuilder().putInputs("input", "parDo.out").putOutputs("output", "timer.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("timer", timerTransform).putPcollections("timer.out", PCollection.newBuilder().setUniqueName("timer.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform)));
assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build())));
assertThat(subgraph, hasSubtransforms("parDo"));
}
Aggregations