Search in sources :

Example 46 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class QueryablePipelineTest method retainOnlyPrimitivesComposites.

@Test
public void retainOnlyPrimitivesComposites() {
    Pipeline p = Pipeline.create();
    p.apply(new org.apache.beam.sdk.transforms.PTransform<PBegin, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PBegin input) {
            return input.apply(Impulse.create()).apply(Window.into(FixedWindows.of(Duration.standardMinutes(5L)))).apply(MapElements.into(TypeDescriptors.longs()).via(l -> 1L));
        }
    });
    Components originalComponents = PipelineTranslation.toProto(p).getComponents();
    Collection<String> primitiveComponents = QueryablePipeline.getPrimitiveTransformIds(originalComponents);
    // Read, Window.Assign, ParDo. This will need to be updated if the expansions change.
    assertThat(primitiveComponents, hasSize(3));
    for (String transformId : primitiveComponents) {
        assertThat(originalComponents.getTransformsMap(), hasKey(transformId));
    }
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PCollection(org.apache.beam.sdk.values.PCollection) PBegin(org.apache.beam.sdk.values.PBegin) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 47 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class QueryablePipelineTest method getEnvironmentWithEnvironment.

@Test
public void getEnvironmentWithEnvironment() {
    Pipeline p = Pipeline.create();
    PCollection<Long> longs = p.apply("Impulse", Impulse.create()).apply("ParDo", ParDo.of(new TestFn()));
    longs.apply(WithKeys.of("a")).apply("groupByKey", GroupByKey.create());
    Components components = PipelineTranslation.toProto(p).getComponents();
    QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
    PTransformNode environmentalTransform = PipelineNode.pTransform("ParDo-ParMultiDo-Test-", components.getTransformsOrThrow("ParDo-ParMultiDo-Test-"));
    PTransformNode nonEnvironmentalTransform = PipelineNode.pTransform("groupByKey", components.getTransformsOrThrow("groupByKey"));
    assertThat(qp.getEnvironment(environmentalTransform).isPresent(), is(true));
    assertThat(qp.getEnvironment(environmentalTransform).get().getUrn(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getUrn()));
    assertThat(qp.getEnvironment(environmentalTransform).get().getPayload(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT.getPayload()));
    assertThat(qp.getEnvironment(nonEnvironmentalTransform).isPresent(), is(false));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 48 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class QueryablePipelineTest method getProducer.

@Test
public void getProducer() {
    Pipeline p = Pipeline.create();
    PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
    PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());
    Components components = PipelineTranslation.toProto(p).getComponents();
    QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
    String impulseOutputName = getOnlyElement(PipelineNode.pTransform("BoundedRead-Impulse", components.getTransformsOrThrow("BoundedRead-Impulse")).getTransform().getOutputsMap().values());
    PTransformNode impulseProducer = PipelineNode.pTransform("BoundedRead-Impulse", components.getTransformsOrThrow("BoundedRead-Impulse"));
    PCollectionNode impulseOutput = PipelineNode.pCollection(impulseOutputName, components.getPcollectionsOrThrow(impulseOutputName));
    String flattenOutputName = getOnlyElement(PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten")).getTransform().getOutputsMap().values());
    PTransformNode flattenProducer = PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten"));
    PCollectionNode flattenOutput = PipelineNode.pCollection(flattenOutputName, components.getPcollectionsOrThrow(flattenOutputName));
    assertThat(qp.getProducer(impulseOutput), equalTo(impulseProducer));
    assertThat(qp.getProducer(flattenOutput), equalTo(flattenProducer));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 49 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class QueryablePipelineTest method perElementConsumersWithConsumingMultipleTimes.

/**
 * Tests that {@link QueryablePipeline#getPerElementConsumers(PCollectionNode)} returns a
 * transform that consumes the node more than once.
 */
@Test
public void perElementConsumersWithConsumingMultipleTimes() {
    Pipeline p = Pipeline.create();
    PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L)));
    PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections());
    Components components = PipelineTranslation.toProto(p).getComponents();
    // This breaks if the way that IDs are assigned to PTransforms changes in PipelineTranslation
    String readOutput = getOnlyElement(components.getTransformsOrThrow("BoundedRead").getOutputsMap().values());
    QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components);
    Set<PTransformNode> consumers = qp.getPerElementConsumers(PipelineNode.pCollection(readOutput, components.getPcollectionsOrThrow(readOutput)));
    assertThat(consumers.size(), equalTo(1));
    assertThat(getOnlyElement(consumers).getTransform().getSpec().getUrn(), equalTo(PTransformTranslation.FLATTEN_TRANSFORM_URN));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 50 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class GreedyPipelineFuserTest method sanitizedTransforms.

@Test
public void sanitizedTransforms() throws Exception {
    PCollection flattenOutput = pc("flatten.out");
    PCollection read1Output = pc("read1.out");
    PCollection read2Output = pc("read2.out");
    PCollection impulse1Output = pc("impulse1.out");
    PCollection impulse2Output = pc("impulse2.out");
    PTransform flattenTransform = PTransform.newBuilder().setUniqueName("Flatten").putInputs(read1Output.getUniqueName(), read1Output.getUniqueName()).putInputs(read2Output.getUniqueName(), read2Output.getUniqueName()).putOutputs(flattenOutput.getUniqueName(), flattenOutput.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build();
    PTransform read1Transform = PTransform.newBuilder().setUniqueName("read1").putInputs(impulse1Output.getUniqueName(), impulse1Output.getUniqueName()).putOutputs(read1Output.getUniqueName(), read1Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build();
    PTransform read2Transform = PTransform.newBuilder().setUniqueName("read2").putInputs(impulse2Output.getUniqueName(), impulse2Output.getUniqueName()).putOutputs(read2Output.getUniqueName(), read2Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build();
    PTransform impulse1Transform = PTransform.newBuilder().setUniqueName("impulse1").putOutputs(impulse1Output.getUniqueName(), impulse1Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).build();
    PTransform impulse2Transform = PTransform.newBuilder().setUniqueName("impulse2").putOutputs(impulse2Output.getUniqueName(), impulse2Output.getUniqueName()).setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).build();
    Pipeline impulse = Pipeline.newBuilder().addRootTransformIds(impulse1Transform.getUniqueName()).addRootTransformIds(impulse2Transform.getUniqueName()).addRootTransformIds(flattenTransform.getUniqueName()).setComponents(Components.newBuilder().putCoders("coder", Coder.newBuilder().build()).putCoders("windowCoder", Coder.newBuilder().build()).putWindowingStrategies("ws", WindowingStrategy.newBuilder().setWindowCoderId("windowCoder").build()).putEnvironments("py", Environments.createDockerEnvironment("py")).putPcollections(flattenOutput.getUniqueName(), flattenOutput).putTransforms(flattenTransform.getUniqueName(), flattenTransform).putPcollections(read1Output.getUniqueName(), read1Output).putTransforms(read1Transform.getUniqueName(), read1Transform).putPcollections(read2Output.getUniqueName(), read2Output).putTransforms(read2Transform.getUniqueName(), read2Transform).putPcollections(impulse1Output.getUniqueName(), impulse1Output).putTransforms(impulse1Transform.getUniqueName(), impulse1Transform).putPcollections(impulse2Output.getUniqueName(), impulse2Output).putTransforms(impulse2Transform.getUniqueName(), impulse2Transform).build()).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(impulse);
    assertThat(fused.getRunnerExecutedTransforms(), hasSize(2));
    assertThat(fused.getFusedStages(), hasSize(2));
    assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput(impulse1Output.getUniqueName()).withTransforms(flattenTransform.getUniqueName(), read1Transform.getUniqueName()), ExecutableStageMatcher.withInput(impulse2Output.getUniqueName()).withTransforms(flattenTransform.getUniqueName(), read2Transform.getUniqueName())));
    assertThat(fused.getFusedStages().stream().flatMap(s -> s.getComponents().getTransformsOrThrow(flattenTransform.getUniqueName()).getInputsMap().values().stream()).collect(Collectors.toList()), containsInAnyOrder(read1Output.getUniqueName(), read2Output.getUniqueName()));
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Pipeline(org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline) Test(org.junit.Test)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)45 Test (org.junit.Test)45 Pipeline (org.apache.beam.sdk.Pipeline)25 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)24 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 Map (java.util.Map)21 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)21 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)17 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)14 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)13 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 PCollection (org.apache.beam.sdk.values.PCollection)12 Coder (org.apache.beam.sdk.coders.Coder)11 KV (org.apache.beam.sdk.values.KV)11 Collection (java.util.Collection)10 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9