Search in sources :

Example 6 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method fusesCompatibleEnvironments.

@Test
public void fusesCompatibleEnvironments() {
    // (impulse.out) -> parDo -> parDo.out -> window -> window.out
    // parDo and window both have the environment "common" and can be fused together
    PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform windowTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform), PipelineNode.pTransform("window", windowTransform)));
    // Nothing consumes the outputs of ParDo or Window, so they don't have to be materialized
    assertThat(subgraph.getOutputPCollections(), emptyIterable());
    assertThat(subgraph, hasSubtransforms("parDo", "window"));
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 7 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method materializesWithGroupByKeyConsumer.

@Test
public void materializesWithGroupByKeyConsumer() {
    // (impulse.out) -> read -> read.out -> gbk -> gbk.out
    // Fuses to
    // (impulse.out) -> read -> (read.out)
    // GBK is the responsibility of the runner, so it is not included in a stage.
    Environment env = Environments.createDockerEnvironment("common");
    PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("gbk", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "gbk.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).build()).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
    PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
    PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
    assertThat(subgraph.getOutputPCollections(), contains(readOutput));
    assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
Also used : PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 8 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method noEnvironmentThrows.

@Test
public void noEnvironmentThrows() {
    // (impulse.out) -> runnerTransform -> gbk.out
    // runnerTransform can't be executed in an environment, so trying to construct it should fail
    PTransform gbkTransform = PTransform.newBuilder().putInputs("input", "impulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).putOutputs("output", "gbk.out").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("runnerTransform", gbkTransform).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("gbk.out").build()).build());
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Environment must be populated");
    GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("runnerTransform", gbkTransform)));
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 9 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class GreedyStageFuserTest method fusesFlatten.

@Test
public void fusesFlatten() {
    // (impulse.out) -> parDo -> parDo.out --> flatten -> flatten.out -> window -> window.out
    // \                     /
    // -> read -> read.out -
    // The flatten can be executed within the same environment as any transform; the window can
    // execute in the same environment as the rest of the transforms, and can fuse with the stage
    PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    PTransform flattenTransform = PTransform.newBuilder().putInputs("readInput", "read.out").putInputs("parDoInput", "parDo.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN)).build();
    PTransform windowTransform = PTransform.newBuilder().putInputs("input", "flatten.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
    QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("flatten", flattenTransform).putPcollections("flatten.out", PCollection.newBuilder().setUniqueName("flatten.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
    ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
    assertThat(subgraph.getOutputPCollections(), emptyIterable());
    assertThat(subgraph, hasSubtransforms("read", "parDo", "flatten", "window"));
}
Also used : PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 10 with Environment

use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.

the class DefaultJobBundleFactoryTest method doesNotCacheDifferentEnvironments.

@Test
public void doesNotCacheDifferentEnvironments() throws Exception {
    Environment envFoo = Environment.newBuilder().setUrn("dummy:urn:another").build();
    RemoteEnvironment remoteEnvFoo = mock(RemoteEnvironment.class);
    InstructionRequestHandler fooInstructionHandler = mock(InstructionRequestHandler.class);
    Map<String, EnvironmentFactory.Provider> envFactoryProviderMapFoo = ImmutableMap.of(environment.getUrn(), envFactoryProvider, envFoo.getUrn(), envFactoryProvider);
    when(envFactory.createEnvironment(eq(envFoo), any())).thenReturn(remoteEnvFoo);
    when(remoteEnvFoo.getInstructionRequestHandler()).thenReturn(fooInstructionHandler);
    // Don't bother creating a distinct instruction response because we don't use it here.
    when(fooInstructionHandler.handle(any())).thenReturn(CompletableFuture.completedFuture(instructionResponse));
    try (DefaultJobBundleFactory bundleFactory = createDefaultJobBundleFactory(envFactoryProviderMapFoo)) {
        bundleFactory.forStage(getExecutableStage(environment));
        bundleFactory.forStage(getExecutableStage(envFoo));
        verify(envFactory).createEnvironment(eq(environment), any());
        verify(envFactory).createEnvironment(eq(envFoo), any());
        verifyNoMoreInteractions(envFactory);
    }
}
Also used : RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) RemoteEnvironment(org.apache.beam.runners.fnexecution.environment.RemoteEnvironment) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Matchers.containsString(org.hamcrest.Matchers.containsString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Provider(org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider) Test(org.junit.Test)

Aggregations

Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)33 Test (org.junit.Test)28 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)17 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)14 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)13 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)12 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)11 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 Map (java.util.Map)7 RemoteEnvironment (org.apache.beam.runners.fnexecution.environment.RemoteEnvironment)7 Pipeline (org.apache.beam.sdk.Pipeline)7 IOException (java.io.IOException)6 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)5 EnvironmentFactory (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory)5 Provider (org.apache.beam.runners.fnexecution.environment.EnvironmentFactory.Provider)5 ServerFactory (org.apache.beam.sdk.fn.server.ServerFactory)5 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)5 Matchers.containsString (org.hamcrest.Matchers.containsString)5 ArrayList (java.util.ArrayList)4