Search in sources :

Example 56 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class ParDoTranslation method getSideInputs.

public static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> application) throws IOException {
    PTransform<?, ?> transform = application.getTransform();
    if (transform instanceof ParDo.MultiOutput) {
        return ((ParDo.MultiOutput<?, ?>) transform).getSideInputs().values().stream().collect(Collectors.toList());
    }
    SdkComponents sdkComponents = SdkComponents.create(application.getPipeline().getOptions());
    RunnerApi.PTransform parDoProto = PTransformTranslation.toProto(application, sdkComponents);
    ParDoPayload payload = ParDoPayload.parseFrom(parDoProto.getSpec().getPayload());
    List<PCollectionView<?>> views = new ArrayList<>();
    RehydratedComponents components = RehydratedComponents.forComponents(sdkComponents.toComponents());
    for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
        String sideInputTag = sideInputEntry.getKey();
        RunnerApi.SideInput sideInput = sideInputEntry.getValue();
        PCollection<?> originalPCollection = checkNotNull((PCollection<?>) application.getInputs().get(new TupleTag<>(sideInputTag)), "no input with tag %s", sideInputTag);
        views.add(PCollectionViewTranslation.viewFromProto(sideInput, sideInputTag, originalPCollection, parDoProto, components));
    }
    return views;
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollectionView(org.apache.beam.sdk.values.PCollectionView) ParDo(org.apache.beam.sdk.transforms.ParDo) Map(java.util.Map) HashMap(java.util.HashMap) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput)

Example 57 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class WindowingStrategyTranslation method toProto.

/**
 * Converts a {@link WindowingStrategy} into a {@link RunnerApi.WindowingStrategy}, registering
 * any components in the provided {@link SdkComponents}.
 */
public static RunnerApi.WindowingStrategy toProto(WindowingStrategy<?, ?> windowingStrategy, SdkComponents components) throws IOException {
    WindowFn<?, ?> windowFn = windowingStrategy.getWindowFn();
    FunctionSpec windowFnSpec = toProto(windowFn, components);
    String environmentId = Strings.isNullOrEmpty(windowingStrategy.getEnvironmentId()) ? components.getEnvironmentIdFor(ResourceHints.create()) : windowingStrategy.getEnvironmentId();
    RunnerApi.WindowingStrategy.Builder windowingStrategyProto = RunnerApi.WindowingStrategy.newBuilder().setOutputTime(toProto(windowingStrategy.getTimestampCombiner())).setAccumulationMode(toProto(windowingStrategy.getMode())).setClosingBehavior(toProto(windowingStrategy.getClosingBehavior())).setAllowedLateness(windowingStrategy.getAllowedLateness().getMillis()).setTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger())).setWindowFn(windowFnSpec).setAssignsToOneWindow(windowFn.assignsToOneWindow()).setMergeStatus(windowFn.isNonMerging() ? MergeStatus.Enum.NON_MERGING : (windowingStrategy.isAlreadyMerged() ? MergeStatus.Enum.ALREADY_MERGED : MergeStatus.Enum.NEEDS_MERGE)).setOnTimeBehavior(toProto(windowingStrategy.getOnTimeBehavior())).setWindowCoderId(components.registerCoder(windowFn.windowCoder())).setEnvironmentId(environmentId);
    return windowingStrategyProto.build();
}
Also used : FunctionSpec(org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy)

Example 58 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuserTest method transformsWithNoEnvironmentBecomeRunnerExecuted.

/*
   * impulse -> .out -> mystery -> .out
   *                 \
   *                  -> enigma -> .out
   * becomes all runner-executed
   */
@Test
public void transformsWithNoEnvironmentBecomeRunnerExecuted() {
    Components components = partialComponents.toBuilder().putTransforms("mystery", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).setUniqueName("Mystery").putInputs("input", "impulse.out").putOutputs("output", "mystery.out").build()).putPcollections("mystery.out", pc("mystery.out")).putTransforms("enigma", PTransform.newBuilder().setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN)).setUniqueName("Enigma").putInputs("input", "impulse.out").putOutputs("output", "enigma.out").build()).putPcollections("enigma.out", pc("enigma.out")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).build());
    assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("impulse", components.getTransformsOrThrow("impulse")), PipelineNode.pTransform("mystery", components.getTransformsOrThrow("mystery")), PipelineNode.pTransform("enigma", components.getTransformsOrThrow("enigma"))));
    assertThat(fused.getFusedStages(), emptyIterable());
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Test(org.junit.Test)

Example 59 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class GreedyPipelineFuserTest method flattenWithHeterogeneousInputsSingleEnvOutputPartiallyMaterialized.

/*
   * impulseA -> .out -> goRead -> .out \
   *                                     -> flatten -> .out -> goParDo -> .out
   * impulseB -> .out -> pyRead -> .out /
   *
   * becomes
   * (impulseA.out) -> goRead -> goRead.out -> flatten -> flatten.out -> goParDo
   * (impulseB.out) -> pyRead -> pyRead.out -> flatten -> (flatten.out)
   * (flatten.out) -> goParDo
   */
@Test
public void flattenWithHeterogeneousInputsSingleEnvOutputPartiallyMaterialized() {
    Components components = Components.newBuilder().putCoders("coder", Coder.newBuilder().build()).putCoders("windowCoder", Coder.newBuilder().build()).putWindowingStrategies("ws", WindowingStrategy.newBuilder().setWindowCoderId("windowCoder").build()).putTransforms("pyImpulse", PTransform.newBuilder().setUniqueName("PyImpulse").putOutputs("output", "pyImpulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build()).putPcollections("pyImpulse.out", pc("pyImpulse.out")).putTransforms("pyRead", PTransform.newBuilder().setUniqueName("PyRead").putInputs("input", "pyImpulse.out").putOutputs("output", "pyRead.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("py").build()).putPcollections("pyRead.out", pc("pyRead.out")).putTransforms("goImpulse", PTransform.newBuilder().setUniqueName("GoImpulse").putOutputs("output", "goImpulse.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)).build()).putPcollections("goImpulse.out", pc("goImpulse.out")).putTransforms("goRead", PTransform.newBuilder().setUniqueName("GoRead").putInputs("input", "goImpulse.out").putOutputs("output", "goRead.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("go").build()).putPcollections("goRead.out", pc("goRead.out")).putTransforms("flatten", PTransform.newBuilder().setUniqueName("Flatten").putInputs("goReadInput", "goRead.out").putInputs("pyReadInput", "pyRead.out").putOutputs("output", "flatten.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.FLATTEN_TRANSFORM_URN)).build()).putPcollections("flatten.out", pc("flatten.out")).putTransforms("goParDo", PTransform.newBuilder().setUniqueName("GoParDo").putInputs("input", "flatten.out").putOutputs("output", "goParDo.out").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(FunctionSpec.newBuilder()).build().toByteString())).setEnvironmentId("go").build()).putPcollections("goParDo.out", pc("goParDo.out")).putEnvironments("go", Environments.createDockerEnvironment("go")).putEnvironments("py", Environments.createDockerEnvironment("py")).build();
    FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).build());
    assertThat(fused.getRunnerExecutedTransforms(), containsInAnyOrder(PipelineNode.pTransform("pyImpulse", components.getTransformsOrThrow("pyImpulse")), PipelineNode.pTransform("goImpulse", components.getTransformsOrThrow("goImpulse"))));
    assertThat(fused.getFusedStages(), containsInAnyOrder(ExecutableStageMatcher.withInput("goImpulse.out").withNoOutputs().withTransforms("goRead", "flatten", "goParDo"), ExecutableStageMatcher.withInput("pyImpulse.out").withOutputs("flatten.out").withTransforms("pyRead", "flatten"), ExecutableStageMatcher.withInput("flatten.out").withNoOutputs().withTransforms("goParDo")));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) Test(org.junit.Test)

Example 60 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class ImmutableExecutableStageTest method ofFullComponentsOnlyHasStagePTransforms.

@Test
public void ofFullComponentsOnlyHasStagePTransforms() throws Exception {
    Environment env = Environments.createDockerEnvironment("foo");
    PTransform pt = PTransform.newBuilder().putInputs("input", "input.out").putInputs("side_input", "sideInput.in").putInputs("timer", "timer.pc").putOutputs("output", "output.out").putOutputs("timer", "timer.pc").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.PAR_DO_TRANSFORM_URN).setPayload(ParDoPayload.newBuilder().setDoFn(RunnerApi.FunctionSpec.newBuilder()).putSideInputs("side_input", RunnerApi.SideInput.getDefaultInstance()).putStateSpecs("user_state", RunnerApi.StateSpec.getDefaultInstance()).putTimerFamilySpecs("timer", RunnerApi.TimerFamilySpec.getDefaultInstance()).build().toByteString())).build();
    PCollection input = PCollection.newBuilder().setUniqueName("input.out").build();
    PCollection sideInput = PCollection.newBuilder().setUniqueName("sideInput.in").build();
    PCollection timer = PCollection.newBuilder().setUniqueName("timer.pc").build();
    PCollection output = PCollection.newBuilder().setUniqueName("output.out").build();
    Components components = Components.newBuilder().putTransforms("pt", pt).putTransforms("other_pt", PTransform.newBuilder().setUniqueName("other").build()).putPcollections("input.out", input).putPcollections("sideInput.in", sideInput).putPcollections("timer.pc", timer).putPcollections("output.out", output).putEnvironments("foo", env).build();
    PTransformNode transformNode = PipelineNode.pTransform("pt", pt);
    SideInputReference sideInputRef = SideInputReference.of(transformNode, "side_input", PipelineNode.pCollection("sideInput.in", sideInput));
    UserStateReference userStateRef = UserStateReference.of(transformNode, "user_state", PipelineNode.pCollection("input.out", input));
    TimerReference timerRef = TimerReference.of(transformNode, "timer");
    ImmutableExecutableStage stage = ImmutableExecutableStage.ofFullComponents(components, env, PipelineNode.pCollection("input.out", input), Collections.singleton(sideInputRef), Collections.singleton(userStateRef), Collections.singleton(timerRef), Collections.singleton(PipelineNode.pTransform("pt", pt)), Collections.singleton(PipelineNode.pCollection("output.out", output)), DEFAULT_WIRE_CODER_SETTINGS);
    assertThat(stage.getComponents().containsTransforms("pt"), is(true));
    assertThat(stage.getComponents().containsTransforms("other_pt"), is(false));
    PTransform stagePTransform = stage.toPTransform("foo");
    assertThat(stagePTransform.getOutputsMap(), hasValue("output.out"));
    assertThat(stagePTransform.getOutputsCount(), equalTo(1));
    assertThat(stagePTransform.getInputsMap(), allOf(hasValue("input.out"), hasValue("sideInput.in")));
    assertThat(stagePTransform.getInputsCount(), equalTo(2));
    ExecutableStagePayload payload = ExecutableStagePayload.parseFrom(stagePTransform.getSpec().getPayload());
    assertThat(payload.getTransformsList(), contains("pt"));
    assertThat(ExecutableStage.fromPayload(payload), equalTo(stage));
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) ExecutableStagePayload(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload) PTransformNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Aggregations

Test (org.junit.Test)55 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)49 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)40 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)31 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 Map (java.util.Map)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 IOException (java.io.IOException)15 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)15 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)14 Coder (org.apache.beam.sdk.coders.Coder)14 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 Pipeline (org.apache.beam.sdk.Pipeline)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)11 KvCoder (org.apache.beam.sdk.coders.KvCoder)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)10