Search in sources :

Example 1 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method sideInputIncludedInStage.

@Test
public void sideInputIncludedInStage() {
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().setUniqueName("read").putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform parDoTransform = PTransform.newBuilder().setUniqueName("parDo").putInputs("input", "read.out").putInputs("side_input", "side_read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    PCollection sideInputPCollection = PCollection.newBuilder().setUniqueName("side_read.out").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("side_read", PTransform.newBuilder().setUniqueName("side_read").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).putInputs("input", "impulse.out").putOutputs("output", "side_read.out").build()).putPcollections("side_read.out", sideInputPCollection).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("read", readTransform)));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, readOutput, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform)));
    PTransformNode parDoNode = PipelineNode.pTransform("parDo", parDoTransform);
    SideInputReference sideInputRef = SideInputReference.of(parDoNode, "side_input", PipelineNode.pCollection("side_read.out", sideInputPCollection));
    assertThat(subgraph.getSideInputs(), contains(sideInputRef));
    assertThat(subgraph.getOutputPCollections(), emptyIterable());
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 2 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method materializesWithSideInputConsumer.

@Test
public void materializesWithSideInputConsumer() {
    // (impulse.out) -> read -> read.out -----------> parDo -> parDo.out -> window -> window.out
    // (impulse.out) -> side_read -> side_read.out /
    // Where parDo takes side_read as a side input, fuses into
    // (impulse.out) -> read -> (read.out)
    // (impulse.out) -> side_read -> (side_read.out)
    // (read.out) -> parDo -> parDo.out -> window -> window.out
    // parDo doesn't have a per-element consumer from side_read.out, so it can't root a stage
    // which consumes from that materialized collection. Nodes with side inputs must root a stage,
    // but do not restrict fusion of consumers.
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("side_read", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).putInputs("input", "impulse.out").putOutputs("output", "side_read.out").build()).putPcollections("side_read.out", PCollection.newBuilder().setUniqueName("side_read.out").build()).putTransforms("parDo", PTransform.newBuilder().putInputs("input", "read.out").putInputs("side_input", "side_read.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build()).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", env).build());
    PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
    assertThat(subgraph.getOutputPCollections(), contains(readOutput));
    assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 3 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method materializesWithDifferentEnvConsumer.

@Test
public void materializesWithDifferentEnvConsumer() {
    // (impulse.out) -> parDo -> parDo.out -> window -> window.out
    // Fuses into
    // (impulse.out) -> parDo -> (parDo.out)
    // (parDo.out) -> window -> window.out
    Environment env = Environments.createDockerEnvironment("common");
    PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("out", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PCollection parDoOutput = PCollection.newBuilder().setUniqueName("parDo.out").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", parDoOutput).putTransforms("window", PTransform.newBuilder().putInputs("input", "parDo.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("rare").build()).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("rare", Environments.createDockerEnvironment("rare")).putEnvironments("common", env).build());
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
    assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("parDo.out", parDoOutput)));
    assertThat(subgraph.getInputPCollection(), equalTo(impulseOutputNode));
    assertThat(subgraph.getEnvironment(), equalTo(env));
    assertThat(subgraph.getTransforms(), contains(PipelineNode.pTransform("parDo", parDoTransform)));
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 4 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method executableStageProducingSideInputMaterializesIt.

@Test
public void executableStageProducingSideInputMaterializesIt() {
    // impulse -- ParDo(createSide)
    // \_ ParDo(processMain) with side input from createSide
    // The ExecutableStage executing createSide must have an output.
    Environment env = Environments.createDockerEnvironment("common");
    PTransform impulse = PTransform.newBuilder().setUniqueName("impulse").putOutputs("output", "impulsePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build();
    PTransform createSide = PTransform.newBuilder().setUniqueName("createSide").putInputs("input", "impulsePC").putOutputs("output", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform processMain = PTransform.newBuilder().setUniqueName("processMain").putInputs("main", "impulsePC").putInputs("side", "sidePC").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side", SideInput.getDefaultInstance()).build().toByteString())).setEnvironmentId("common").build();
    PCollection sidePC = PCollection.newBuilder().setUniqueName("sidePC").build();
    PCollection impulsePC = PCollection.newBuilder().setUniqueName("impulsePC").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("impulse", impulse).putTransforms("createSide", createSide).putTransforms("processMain", processMain).putPcollections("impulsePC", impulsePC).putPcollections("sidePC", sidePC).putEnvironments("common", env).build());
    PCollectionNode impulseOutput = getOnlyElement(p.getOutputPCollections(PipelineNode.pTransform("impulse", impulse)));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutput, ImmutableSet.of(PipelineNode.pTransform("createSide", createSide)));
    assertThat(subgraph.getOutputPCollections(), contains(PipelineNode.pCollection("sidePC", sidePC)));
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 5 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class FusedPipelineTest method testToProto.

@Test
public void testToProto() {
    Pipeline p = Pipeline.create();
    p.apply("impulse", Impulse.create()).apply("map", MapElements.into(TypeDescriptors.integers()).via(bytes -> bytes.length)).apply("key", WithKeys.of("foo")).apply("gbk", GroupByKey.create()).apply("values", Values.create());
    RunnerApi.Pipeline protoPipeline = PipelineTranslation.toProto(p);
    checkState(protoPipeline.getRootTransformIdsList().containsAll(ImmutableList.of("impulse", "map", "key", "gbk", "values")), "Unexpected Root Transform IDs %s", protoPipeline.getRootTransformIdsList());
    FusedPipeline fused = GreedyPipelineFuser.fuse(protoPipeline);
    checkState(fused.getRunnerExecutedTransforms().size() == 2, "Unexpected number of runner transforms %s", fused.getRunnerExecutedTransforms());
    checkState(fused.getFusedStages().size() == 2, "Unexpected number of fused stages %s", fused.getFusedStages());
    RunnerApi.Pipeline fusedPipelineProto = fused.toPipeline();
    assertThat("Root Transforms should all be present in the Pipeline Components", fusedPipelineProto.getComponents().getTransformsMap().keySet(), hasItems(fusedPipelineProto.getRootTransformIdsList().toArray(new String[0])));
    assertThat("Should contain Impulse, GroupByKey, and two Environment Stages", fusedPipelineProto.getRootTransformIdsCount(), equalTo(4));
    assertThat(fusedPipelineProto.getRootTransformIdsList(), hasItems("impulse", "gbk"));
    assertRootsInTopologicalOrder(fusedPipelineProto);
    // Since MapElements, WithKeys, and Values are all composites of a ParDo, we do prefix matching
    // instead of looking at the inside of their expansions
    assertThat("Fused transforms should be present in the components", fusedPipelineProto.getComponents().getTransformsMap(), allOf(hasKey(startsWith("map")), hasKey(startsWith("key")), hasKey(startsWith("values"))));
    assertThat("Fused transforms shouldn't be present in the root IDs", fusedPipelineProto.getRootTransformIdsList(), not(hasItems(startsWith("map"), startsWith("key"), startsWith("values"))));
    // The other components should be those of the original pipeline.
    assertThat(fusedPipelineProto.getComponents().getCodersMap(), equalTo(protoPipeline.getComponents().getCodersMap()));
    assertThat(fusedPipelineProto.getComponents().getWindowingStrategiesMap(), equalTo(protoPipeline.getComponents().getWindowingStrategiesMap()));
    assertThat(fusedPipelineProto.getComponents().getEnvironmentsMap(), equalTo(protoPipeline.getComponents().getEnvironmentsMap()));
    assertThat(fusedPipelineProto.getComponents().getPcollectionsMap(), equalTo(protoPipeline.getComponents().getPcollectionsMap()));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)33 Test (org.junit.Test)28 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)17 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)14 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)13 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)12 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)11 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 Map (java.util.Map)7 RemoteEnvironment (org.apache.beam.runners.fnexecution.environment.RemoteEnvironment)7 Pipeline (org.apache.beam.sdk.Pipeline)7 IOException (java.io.IOException)6 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)5 EnvironmentFactory (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory)5 Provider (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider)5 ServerFactory (org.apache.beam.sdk.fn.server.ServerFactory)5 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 ArrayList (java.util.ArrayList)4