use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class GreedyStageFuserTest method fusesCompatibleEnvironments.
@Test
public void fusesCompatibleEnvironments() {
// (impulse.out) -> parDo -> parDo.out -> window -> window.out
// parDo and window both have the environment "common" and can be fused together
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform windowTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform), PipelineNode.pTransform("window", windowTransform)));
// Nothing consumes the outputs of ParDo or Window, so they don't have to be materialized
assertThat(subgraph.getOutputPCollections(), emptyIterable());
assertThat(subgraph, hasSubtransforms("parDo", "window"));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class GreedyStageFuserTest method materializesWithGroupByKeyConsumer.
@Test
public void materializesWithGroupByKeyConsumer() {
// (impulse.out) -> read -> read.out -> gbk -> gbk.out
// Fuses to
// (impulse.out) -> read -> (read.out)
// GBK is the responsibility of the runner, so it is not included in a stage.
Environment env = Environments.createDockerEnvironment("common");
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("gbk", PTransform.newBuilder().putInputs("input", "read.out").putOutputs("output", "gbk.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).build()).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putEnvironments("common", env).build());
PTransformNode readNode = PipelineNode.pTransform("read", readTransform);
PCollectionNode readOutput = getOnlyElement(p.getOutputPCollections(readNode));
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(readNode));
assertThat(subgraph.getOutputPCollections(), contains(readOutput));
assertThat(subgraph, hasSubtransforms(readNode.getId()));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class GreedyStageFuserTest method noEnvironmentThrows.
@Test
public void noEnvironmentThrows() {
// (impulse.out) -> runnerTransform -> gbk.out
// runnerTransform can't be executed in an environment, so trying to construct it should fail
PTransform gbkTransform = PTransform.newBuilder().putInputs("input", "impulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)).putOutputs("output", "gbk.out").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("runnerTransform", gbkTransform).putPcollections("gbk.out", PCollection.newBuilder().setUniqueName("gbk.out").build()).build());
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("Environment must be populated");
GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("runnerTransform", gbkTransform)));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class GreedyStageFuserTest method fusesFlatten.
@Test
public void fusesFlatten() {
// (impulse.out) -> parDo -> parDo.out --> flatten -> flatten.out -> window -> window.out
// \ /
// -> read -> read.out -
// The flatten can be executed within the same environment as any transform; the window can
// execute in the same environment as the rest of the transforms, and can fuse with the stage
PTransform readTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "read.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform flattenTransform = PTransform.newBuilder().putInputs("readInput", "read.out").putInputs("parDoInput", "parDo.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN)).build();
PTransform windowTransform = PTransform.newBuilder().putInputs("input", "flatten.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(partialComponents.toBuilder().putTransforms("read", readTransform).putPcollections("read.out", PCollection.newBuilder().setUniqueName("read.out").build()).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("flatten", flattenTransform).putPcollections("flatten.out", PCollection.newBuilder().setUniqueName("flatten.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build());
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, p.getPerElementConsumers(impulseOutputNode));
assertThat(subgraph.getOutputPCollections(), emptyIterable());
assertThat(subgraph, hasSubtransforms("read", "parDo", "flatten", "window"));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Environment in project beam by apache.
the class DefaultJobBundleFactoryTest method doesNotCacheDifferentEnvironments.
@Test
public void doesNotCacheDifferentEnvironments() throws Exception {
Environment envFoo = Environment.newBuilder().setUrn("dummy:urn:another").build();
RemoteEnvironment remoteEnvFoo = mock(RemoteEnvironment.class);
InstructionRequestHandler fooInstructionHandler = mock(InstructionRequestHandler.class);
Map<String, EnvironmentFactory.Provider> envFactoryProviderMapFoo = ImmutableMap.of(environment.getUrn(), envFactoryProvider, envFoo.getUrn(), envFactoryProvider);
when(envFactory.createEnvironment(eq(envFoo), any())).thenReturn(remoteEnvFoo);
when(remoteEnvFoo.getInstructionRequestHandler()).thenReturn(fooInstructionHandler);
// Don't bother creating a distinct instruction response because we don't use it here.
when(fooInstructionHandler.handle(any())).thenReturn(CompletableFuture.completedFuture(instructionResponse));
try (DefaultJobBundleFactory bundleFactory = createDefaultJobBundleFactory(envFactoryProviderMapFoo)) {
bundleFactory.forStage(getExecutableStage(environment));
bundleFactory.forStage(getExecutableStage(envFoo));
verify(envFactory).createEnvironment(eq(environment), any());
verify(envFactory).createEnvironment(eq(envFoo), any());
verifyNoMoreInteractions(envFactory);
}
}
Aggregations