use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class DataflowPipelineTranslatorTest method testPortablePipelineContainsExpectedDependenciesAndCapabilities.
@Test
public void testPortablePipelineContainsExpectedDependenciesAndCapabilities() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
options.setExperiments(Arrays.asList("beam_fn_api"));
DataflowRunner runner = DataflowRunner.fromOptions(options);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Impulse.create()).apply(MapElements.via(new SimpleFunction<byte[], String>() {
@Override
public String apply(byte[] input) {
return "";
}
})).apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
runner.replaceV1Transforms(pipeline);
File file1 = File.createTempFile("file1-", ".txt");
file1.deleteOnExit();
File file2 = File.createTempFile("file2-", ".txt");
file2.deleteOnExit();
SdkComponents sdkComponents = SdkComponents.create();
sdkComponents.registerEnvironment(Environments.createDockerEnvironment(DataflowRunner.getContainerImageForJob(options)).toBuilder().addAllDependencies(Environments.getArtifacts(ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2))).addAllCapabilities(Environments.getJavaCapabilities()).build());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
JobSpecification result = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
Components componentsProto = result.getPipelineProto().getComponents();
assertThat(Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values()).getCapabilitiesList(), containsInAnyOrder(Environments.getJavaCapabilities().toArray(new String[0])));
assertThat(Iterables.getOnlyElement(componentsProto.getEnvironmentsMap().values()).getDependenciesList(), containsInAnyOrder(Environments.getArtifacts(ImmutableList.of("file1.txt=" + file1, "file2.txt=" + file2)).toArray(new ArtifactInformation[0])));
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class GreedyStageFuserTest method flattenWithHeterogeneousInputsAndOutputs.
@Test
public void flattenWithHeterogeneousInputsAndOutputs() {
// (impulse.out) -> pyRead -> pyRead.out \ -> pyParDo -> pyParDo.out
// (impulse.out) -> -> flatten -> flatten.out |
// (impulse.out) -> goRead -> goRead.out / -> goWindow -> goWindow.out
// fuses into
// (impulse.out) -> pyRead -> pyRead.out -> flatten -> (flatten.out)
// (impulse.out) -> goRead -> goRead.out -> flatten -> (flatten.out)
// (flatten.out) -> pyParDo -> pyParDo.out
// (flatten.out) -> goWindow -> goWindow.out
PTransform pyRead = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "pyRead.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString()).build()).setEnvironmentId("py").build();
PTransform goRead = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "goRead.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString()).build()).setEnvironmentId("go").build();
PTransform pyParDo = PTransform.newBuilder().putInputs("input", "flatten.out").putOutputs("output", "pyParDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString()).build()).setEnvironmentId("py").build();
PTransform goWindow = PTransform.newBuilder().putInputs("input", "flatten.out").putOutputs("output", "goWindow.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString()).build()).setEnvironmentId("go").build();
PCollection flattenPc = PCollection.newBuilder().setUniqueName("flatten.out").build();
Components components = partialComponents.toBuilder().putTransforms("pyRead", pyRead).putPcollections("pyRead.out", PCollection.newBuilder().setUniqueName("pyRead.out").build()).putTransforms("goRead", goRead).putPcollections("goRead.out", PCollection.newBuilder().setUniqueName("goRead.out").build()).putTransforms("flatten", PTransform.newBuilder().putInputs("py_input", "pyRead.out").putInputs("go_input", "goRead.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN).build()).build()).putPcollections("flatten.out", flattenPc).putTransforms("pyParDo", pyParDo).putPcollections("pyParDo.out", PCollection.newBuilder().setUniqueName("pyParDo.out").build()).putTransforms("goWindow", goWindow).putPcollections("goWindow.out", PCollection.newBuilder().setUniqueName("goWindow.out").build()).putEnvironments("go", Environments.createDockerEnvironment("go")).putEnvironments("py", Environments.createDockerEnvironment("py")).build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(components);
ExecutableStage readFromPy = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("pyRead", pyRead)));
ExecutableStage readFromGo = GreedyStageFuser.forGrpcPortRead(p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("goRead", goRead)));
assertThat(readFromPy.getOutputPCollections(), contains(PipelineNode.pCollection("flatten.out", flattenPc)));
// The stage must materialize the flatten, so the `go` stage can read it; this means that this
// parDo can't be in the stage, as it'll be a reader of that materialized PCollection. The same
// is true for the go window.
assertThat(readFromPy.getTransforms(), not(hasItem(PipelineNode.pTransform("pyParDo", pyParDo))));
assertThat(readFromGo.getOutputPCollections(), contains(PipelineNode.pCollection("flatten.out", flattenPc)));
assertThat(readFromGo.getTransforms(), not(hasItem(PipelineNode.pTransform("goWindow", goWindow))));
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class ProtoOverridesTest method replacesOnlyMatching.
@Test
public void replacesOnlyMatching() {
RunnerApi.Pipeline p = Pipeline.newBuilder().addAllRootTransformIds(ImmutableList.of("first", "second")).setComponents(Components.newBuilder().putTransforms("first", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:first")).build()).putTransforms("second", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn("beam:second")).build()).putPcollections("intermediatePc", PCollection.newBuilder().setUniqueName("intermediate").build()).putCoders("coder", Coder.newBuilder().setSpec(FunctionSpec.getDefaultInstance()).build())).build();
PTransform secondReplacement = PTransform.newBuilder().addSubtransforms("second_sub").setSpec(FunctionSpec.newBuilder().setUrn("beam:second:replacement").setPayload(ByteString.copyFrom("foo-bar-baz".getBytes(StandardCharsets.UTF_8)))).build();
WindowingStrategy introducedWS = WindowingStrategy.newBuilder().setAccumulationMode(AccumulationMode.Enum.ACCUMULATING).build();
RunnerApi.Components extraComponents = Components.newBuilder().putPcollections("intermediatePc", PCollection.newBuilder().setUniqueName("intermediate_replacement").build()).putWindowingStrategies("new_ws", introducedWS).putTransforms("second_sub", PTransform.getDefaultInstance()).build();
Pipeline updated = ProtoOverrides.updateTransform("beam:second", p, new TestReplacer(secondReplacement, extraComponents));
PTransform updatedSecond = updated.getComponents().getTransformsOrThrow("second");
assertThat(updatedSecond, equalTo(secondReplacement));
assertThat(updated.getComponents().getWindowingStrategiesOrThrow("new_ws"), equalTo(introducedWS));
assertThat(updated.getComponents().getTransformsOrThrow("second_sub"), equalTo(PTransform.getDefaultInstance()));
// TODO: This might not be appropriate. Merging in the other direction might force that callers
// are well behaved.
assertThat(updated.getComponents().getPcollectionsOrThrow("intermediatePc").getUniqueName(), equalTo("intermediate_replacement"));
// Assert that the untouched components are unchanged.
assertThat(updated.getComponents().getTransformsOrThrow("first"), equalTo(p.getComponents().getTransformsOrThrow("first")));
assertThat(updated.getComponents().getCodersOrThrow("coder"), equalTo(p.getComponents().getCodersOrThrow("coder")));
assertThat(updated.getRootTransformIdsList(), equalTo(p.getRootTransformIdsList()));
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class ExecutableStageTest method testRoundTripToFromTransform.
@Test
public void testRoundTripToFromTransform() throws Exception {
Environment env = org.apache.beam.runners.core.construction.Environments.createDockerEnvironment("foo");
PTransform pt = PTransform.newBuilder().putInputs("input", "input.out").putInputs("side_input", "sideInput.in").putInputs("timer", "timer.out").putOutputs("output", "output.out").putOutputs("timer", "timer.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).putSideInputs("side_input", SideInput.getDefaultInstance()).putStateSpecs("user_state", StateSpec.getDefaultInstance()).putTimerFamilySpecs("timer", TimerFamilySpec.getDefaultInstance()).build().toByteString())).setEnvironmentId("foo").build();
PCollection input = PCollection.newBuilder().setUniqueName("input.out").build();
PCollection sideInput = PCollection.newBuilder().setUniqueName("sideInput.in").build();
PCollection timer = PCollection.newBuilder().setUniqueName("timer.out").build();
PCollection output = PCollection.newBuilder().setUniqueName("output.out").build();
Components components = Components.newBuilder().putTransforms("pt", pt).putPcollections("input.out", input).putPcollections("sideInput.in", sideInput).putPcollections("timer.out", timer).putPcollections("output.out", output).putEnvironments("foo", env).build();
PTransformNode transformNode = PipelineNode.pTransform("pt", pt);
SideInputReference sideInputRef = SideInputReference.of(transformNode, "side_input", PipelineNode.pCollection("sideInput.in", sideInput));
UserStateReference userStateRef = UserStateReference.of(transformNode, "user_state", PipelineNode.pCollection("input.out", input));
TimerReference timerRef = TimerReference.of(transformNode, "timer");
ImmutableExecutableStage stage = ImmutableExecutableStage.of(components, env, PipelineNode.pCollection("input.out", input), Collections.singleton(sideInputRef), Collections.singleton(userStateRef), Collections.singleton(timerRef), Collections.singleton(PipelineNode.pTransform("pt", pt)), Collections.singleton(PipelineNode.pCollection("output.out", output)), DEFAULT_WIRE_CODER_SETTINGS);
PTransform stagePTransform = stage.toPTransform("foo");
assertThat(stagePTransform.getOutputsMap(), hasValue("output.out"));
assertThat(stagePTransform.getOutputsCount(), equalTo(1));
assertThat(stagePTransform.getInputsMap(), allOf(hasValue("input.out"), hasValue("sideInput.in")));
assertThat(stagePTransform.getInputsCount(), equalTo(2));
ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(stagePTransform.getSpec().getPayload());
assertThat(payload.getTransformsList(), contains("pt"));
assertThat(ExecutableStage.fromPayload(payload), equalTo(stage));
}
use of org.apache.beam.sdk.common.runner.v1.RunnerApi.Components in project beam by apache.
the class ExecutableStageTest method testRoundTripToFromTransformFused.
@Test
public void testRoundTripToFromTransformFused() throws Exception {
PTransform parDoTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "parDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
PTransform windowTransform = PTransform.newBuilder().putInputs("input", "impulse.out").putOutputs("output", "window.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("common").build();
Components components = Components.newBuilder().putTransforms("impulse", PTransform.newBuilder().putOutputs("output", "impulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build()).putPcollections("impulse.out", PCollection.newBuilder().setUniqueName("impulse.out").build()).putTransforms("parDo", parDoTransform).putPcollections("parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()).putTransforms("window", windowTransform).putPcollections("window.out", PCollection.newBuilder().setUniqueName("window.out").build()).putEnvironments("common", Environments.createDockerEnvironment("common")).build();
QueryablePipeline p = QueryablePipeline.forPrimitivesIn(components);
ExecutableStage subgraph = GreedyStageFuser.forGrpcPortRead(p, PipelineNode.pCollection("impulse.out", PCollection.newBuilder().setUniqueName("impulse.out").build()), ImmutableSet.of(PipelineNode.pTransform("parDo", parDoTransform), PipelineNode.pTransform("window", windowTransform)));
PTransform ptransform = subgraph.toPTransform("foo");
assertThat(ptransform.getSpec().getUrn(), equalTo(ExecutableStage.URN));
assertThat(ptransform.getInputsMap().values(), containsInAnyOrder("impulse.out"));
assertThat(ptransform.getOutputsMap().values(), emptyIterable());
ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(ptransform.getSpec().getPayload());
assertThat(payload.getTransformsList(), contains("parDo", "window"));
ExecutableStage desered = ExecutableStage.fromPayload(payload);
assertThat(desered, equalTo(subgraph));
}
Aggregations