use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class QueryablePipelineTest method retainOnlyPrimitivesComposites.
@Test
public void retainOnlyPrimitivesComposites() {
Pipeline p = Pipeline.create();
p.apply(new org.apache.beam.sdk.transforms.PTransform<PBegin, PCollection<Long>>() {
@Override
public PCollection<Long> expand(PBegin input) {
return input.apply(Impulse.create()).apply(Window.into(FixedWindows.of(Duration.standardMinutes(5L)))).apply(MapElements.into(TypeDescriptors.longs()).via(l -> 1L));
}
});
Components originalComponents = PipelineTranslation.toProto(p).getComponents();
Collection<String> primitiveComponents = QueryablePipeline.getPrimitiveTransformIds(originalComponents);
// Read, Window.Assign, ParDo. This will need to be updated if the expansions change.
assertThat(primitiveComponents, hasSize(3));
for (String transformId : primitiveComponents) {
assertThat(originalComponents.getTransformsMap(), hasKey(transformId));
}
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class QueryablePipelineTest method getEnvironmentWithEnvironment.
@Test
public void getEnvironmentWithEnvironment() {
Pipeline p = Pipeline.create();
PCollection<Long> longs = p.apply("Impulse", Impulse.create()).apply("ParDo", ParDo.of(new TestFn()));
longs.apply(WithKeys.of("a")).apply("groupByKey", GroupByKey.create());
Components components = PipelineTranslation.toProto(p).getComponents();
QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
PTransformNode environmentalTransform = PipelineNode.pTransform("ParDo-ParMultiDo-Test-", components.getTransformsOrThrow("ParDo-ParMultiDo-Test-"));
PTransformNode nonEnvironmentalTransform = PipelineNode.pTransform("groupByKey", components.getTransformsOrThrow("groupByKey"));
assertThat(qp.getEnvironment(environmentalTransform).isPresent(), is(true));
assertThat(qp.getEnvironment(environmentalTransform).get().getUrn(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn()));
assertThat(qp.getEnvironment(environmentalTransform).get().getPayload(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getPayload()));
assertThat(qp.getEnvironment(nonEnvironmentalTransform).isPresent(), is(false));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class QueryablePipelineTest method getProducer.
@Test
public void getProducer() {
Pipeline p = Pipeline.create();
PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());
Components components = PipelineTranslation.toProto(p).getComponents();
QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
String impulseOutputName = getOnlyElement(PipelineNode.pTransform("BoundedRead-Impulse", components.getTransformsOrThrow("BoundedRead-Impulse")).getTransform().getOutputsMap().values());
PTransformNode impulseProducer = PipelineNode.pTransform("BoundedRead-Impulse", components.getTransformsOrThrow("BoundedRead-Impulse"));
PCollectionNode impulseOutput = PipelineNode.pCollection(impulseOutputName, components.getPcollectionsOrThrow(impulseOutputName));
String flattenOutputName = getOnlyElement(PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten")).getTransform().getOutputsMap().values());
PTransformNode flattenProducer = PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"));
PCollectionNode flattenOutput = PipelineNode.pCollection(flattenOutputName, components.getPcollectionsOrThrow(flattenOutputName));
assertThat(qp.getProducer(impulseOutput), equalTo(impulseProducer));
assertThat(qp.getProducer(flattenOutput), equalTo(flattenProducer));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class QueryablePipelineTest method perElementConsumersWithConsumingMultipleTimes.
/**
* Tests that {@link QueryablePipeline#getPerElementConsumers(PCollectionNode)} returns a
* transform that consumes the node more than once.
*/
@Test
public void perElementConsumersWithConsumingMultipleTimes() {
Pipeline p = Pipeline.create();
PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());
Components components = PipelineTranslation.toProto(p).getComponents();
// This breaks if the way that IDs are assigned to PTransforms changes in PipelineTranslation
String readOutput = getOnlyElement(components.getTransformsOrThrow("BoundedRead").getOutputsMap().values());
QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
Set<PTransformNode> consumers = qp.getPerElementConsumers(PipelineNode.pCollection(readOutput, components.getPcollectionsOrThrow(readOutput)));
assertThat(consumers.size(), equalTo(1));
assertThat(getOnlyElement(consumers).getTransform().getSpec().getUrn(), equalTo(PTransformTranslation.FLATTEN_TRANSFORM_URN));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class GreedyPipelineFuserTest method sanitizedTransforms.
@Test
public void sanitizedTransforms() throws Exception {
PCollection flattenOutput = pc("flatten.out");
PCollection read1Output = pc("read1.out");
PCollection read2Output = pc("read2.out");
PCollection impulse1Output = pc("impulse1.out");
PCollection impulse2Output = pc("impulse2.out");
PTransform flattenTransform = PTransform.newBuilder().setUniqueName("Flatten").putInputs(read1Output.getUniqueName(), read1Output.getUniqueName()).putInputs(read2Output.getUniqueName(), read2Output.getUniqueName()).putOutputs(flattenOutput.getUniqueName(), flattenOutput.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build();
PTransform read1Transform = PTransform.newBuilder().setUniqueName("read1").putInputs(impulse1Output.getUniqueName(), impulse1Output.getUniqueName()).putOutputs(read1Output.getUniqueName(), read1Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build();
PTransform read2Transform = PTransform.newBuilder().setUniqueName("read2").putInputs(impulse2Output.getUniqueName(), impulse2Output.getUniqueName()).putOutputs(read2Output.getUniqueName(), read2Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build();
PTransform impulse1Transform = PTransform.newBuilder().setUniqueName("impulse1").putOutputs(impulse1Output.getUniqueName(), impulse1Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).build();
PTransform impulse2Transform = PTransform.newBuilder().setUniqueName("impulse2").putOutputs(impulse2Output.getUniqueName(), impulse2Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).build();
Pipeline impulse = Pipeline.newBuilder().addRootTransformIds(impulse1Transform.getUniqueName()).addRootTransformIds(impulse2Transform.getUniqueName()).addRootTransformIds(flattenTransform.getUniqueName()).setComponents(Components.newBuilder().putCoders("coder", Coder.newBuilder().build()).putCoders("windowCoder", Coder.newBuilder().build()).putWindowingStrategies("ws", WindowingStrategy.newBuilder().setWindowCoderId("windowCoder").build()).putEnvironments("py", Environments.createDockerEnvironment("py")).putPcollections(flattenOutput.getUniqueName(), flattenOutput).putTransforms(flattenTransform.getUniqueName(), flattenTransform).putPcollections(read1Output.getUniqueName(), read1Output).putTransforms(read1Transform.getUniqueName(), read1Transform).putPcollections(read2Output.getUniqueName(), read2Output).putTransforms(read2Transform.getUniqueName(), read2Transform).putPcollections(impulse1Output.getUniqueName(), impulse1Output).putTransforms(impulse1Transform.getUniqueName(), impulse1Transform).putPcollections(impulse2Output.getUniqueName(), impulse2Output).putTransforms(impulse2Transform.getUniqueName(), impulse2Transform).build()).build();
FusedPipeline fused = GreedyPipelineFuser.fuse(impulse);
assertThat(fused.getRunnerExecutedTransforms(), hasSize(2));
assertThat(fused.getFusedStages(), hasSize(2));
assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput(impulse1Output.getUniqueName()).withTransforms(flattenTransform.getUniqueName(), read1Transform.getUniqueName()), ExecutableStageMatcher.withInput(impulse2Output.getUniqueName()).withTransforms(flattenTransform.getUniqueName(), read2Transform.getUniqueName())));
assertThat(fused.getFusedStages().stream().flatMap(s -> s.getComponents().getTransformsOrThrow(flattenTransform.getUniqueName()).getInputsMap().values().stream()).collect(Collectors.toList()), containsInAnyOrder(read1Output.getUniqueName(), read2Output.getUniqueName()));
}
Aggregations