use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class QueryablePipelineTest method fromComponentsWithMalformedComponents.
@Test
public void fromComponentsWithMalformedComponents() {
Components components = Components.newBuilder().putTransforms("root", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN).build()).putOutputs("output", "output.out").build()).build();
thrown.expect(IllegalArgumentException.class);
QueryablePipeline.forPrimitivesIn(components).getComponents();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class QueryablePipelineTest method retainOnlyPrimitivesWithOnlyPrimitivesUnchanged.
@Test
public void retainOnlyPrimitivesWithOnlyPrimitivesUnchanged() {
Pipeline p = Pipeline.create();
p.apply("Impulse", Impulse.create()).apply("multi-do", ParDo.of(new TestFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty()));
Components originalComponents = PipelineTranslation.toProto(p).getComponents();
Collection<String> primitiveComponents = QueryablePipeline.getPrimitiveTransformIds(originalComponents);
assertThat(primitiveComponents, equalTo(originalComponents.getTransformsMap().keySet()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class QueryablePipelineTest method perElementConsumersWithConsumingMultipleTimes.
/**
* Tests that {@link QueryablePipeline#getPerElementConsumers(PCollectionNode)} returns a
* transform that consumes the node more than once.
*/
@Test
public void perElementConsumersWithConsumingMultipleTimes() {
Pipeline p = Pipeline.create();
PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());
Components components = PipelineTranslation.toProto(p).getComponents();
// This breaks if the way that IDs are assigned to PTransforms changes in PipelineTranslation
String readOutput = getOnlyElement(components.getTransformsOrThrow("BoundedRead").getOutputsMap().values());
QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
Set<PTransformNode> consumers = qp.getPerElementConsumers(PipelineNode.pCollection(readOutput, components.getPcollectionsOrThrow(readOutput)));
assertThat(consumers.size(), equalTo(1));
assertThat(getOnlyElement(consumers).getTransform().getSpec().getUrn(), equalTo(PTransformTranslation.FLATTEN_TRANSFORM_URN));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class GreedyPipelineFuserTest method parDoWithTimerRootsStage.
/*
* impulse -> .out -> parDo -> .out -> timer -> .out
* becomes
* (impulse.out) -> parDo -> (parDo.out)
* (parDo.out) -> timer
*/
@Test
public void parDoWithTimerRootsStage() {
// (impulse.out) -> parDo -> (parDo.out)
// (parDo.out) -> timer -> timer.out
// timer has a timer spec which prevents it from fusing with an upstream ParDo
PTransform parDoTransform = PTransform.newBuilder().setUniqueName("ParDo").putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform timerTransform = PTransform.newBuilder().setUniqueName("TimerParDo").putInputs("input", "parDo.out").putInputs("timer", "timer.out").putOutputs("timer", "timer.out").putOutputs("output", "output.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
Components components = partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", pc("parDo.out")).putTransforms("timer", timerTransform).putPcollections("timer.out", pc("timer.out")).putPcollections("output.out", pc("output.out")).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).addRequirements(ParDoTranslation.REQUIRES_STATEFUL_PROCESSING_URN).build());
assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse"))));
assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("impulse.out").withOutputs("parDo.out").withTransforms("parDo"), ExecutableStageMatcher.withInput("parDo.out").withNoOutputs().withTransforms("timer")));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.
the class GreedyPipelineFuserTest method compositesIgnored.
/*
* impulse -> .out -> ( read -> .out --> goTransform -> .out )
* \
* -> pyTransform -> .out )
* becomes (impulse.out) -> read -> (read.out)
* (read.out) -> goTransform
* (read.out) -> pyTransform
*/
@Test
public void compositesIgnored() {
Components components = partialComponents.toBuilder().putTransforms("read", PTransform.newBuilder().setUniqueName("Read").putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("read.out", pc("read.out")).putTransforms("goTransform", PTransform.newBuilder().setUniqueName("GoTransform").putInputs("input", "read.out").putOutputs("output", "go.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("go").build()).putPcollections("go.out", pc("go.out")).putTransforms("pyTransform", PTransform.newBuilder().setUniqueName("PyTransform").putInputs("input", "read.out").putOutputs("output", "py.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("py.out", pc("py.out")).putTransforms("compositeMultiLang", PTransform.newBuilder().setUniqueName("CompositeMultiLang").putInputs("input", "impulse.out").putOutputs("pyOut", "py.out").putOutputs("goOut", "go.out").addSubtransforms("read").addSubtransforms("goTransform").addSubtransforms("pyTransform").build()).build();
FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().addRootTransformIds("impulse").addRootTransformIds("compositeMultiLang").setComponents(components).build());
// Impulse is the runner transform
assertThat(fused.getRunnerExecutedTransforms(), hasSize(1));
assertThat(fused.getFusedStages(), hasSize(3));
assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("impulse.out").withOutputs("read.out").withTransforms("read"), ExecutableStageMatcher.withInput("read.out").withNoOutputs().withTransforms("pyTransform"), ExecutableStageMatcher.withInput("read.out").withNoOutputs().withTransforms("goTransform")));
}
Aggregations