Search in sources :

Example 16 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class FusedPipelineTest method testToProto.

@Test
public void testToProto() {
    Pipeline p = Pipeline.create();
    p.apply("impulse", Impulse.create()).apply("map", MapElements.into(TypeDescriptors.integers()).via(bytes -> bytes.length)).apply("key", WithKeys.of("foo")).apply("gbk", GroupByKey.create()).apply("values", Values.create());
    RunnerApi.Pipeline protoPipeline = PipelineTranslation.toProto(p);
    checkState(protoPipeline.getRootTransformIdsList().containsAll(ImmutableList.of("impulse", "map", "key", "gbk", "values")), "Unexpected Root Transform IDs %s", protoPipeline.getRootTransformIdsList());
    FusedPipeline fused = GreedyPipelineFuser.fuse(protoPipeline);
    checkState(fused.getRunnerExecutedTransforms().size() == 2, "Unexpected number of runner transforms %s", fused.getRunnerExecutedTransforms());
    checkState(fused.getFusedStages().size() == 2, "Unexpected number of fused stages %s", fused.getFusedStages());
    RunnerApi.Pipeline fusedPipelineProto = fused.toPipeline();
    assertThat("Root Transforms should all be present in the Pipeline Components", fusedPipelineProto.getComponents().getTransformsMap().keySet(), hasItems(fusedPipelineProto.getRootTransformIdsList().toArray(new String[0])));
    assertThat("Should contain Impulse, GroupByKey, and two Environment Stages", fusedPipelineProto.getRootTransformIdsCount(), equalTo(4));
    assertThat(fusedPipelineProto.getRootTransformIdsList(), hasItems("impulse", "gbk"));
    assertRootsInTopologicalOrder(fusedPipelineProto);
    // Since MapElements, WithKeys, and Values are all composites of a ParDo, we do prefix matching
    // instead of looking at the inside of their expansions
    assertThat("Fused transforms should be present in the components", fusedPipelineProto.getComponents().getTransformsMap(), allOf(hasKey(startsWith("map")), hasKey(startsWith("key")), hasKey(startsWith("values"))));
    assertThat("Fused transforms shouldn't be present in the root IDs", fusedPipelineProto.getRootTransformIdsList(), not(hasItems(startsWith("map"), startsWith("key"), startsWith("values"))));
    // The other components should be those of the original pipeline.
    assertThat(fusedPipelineProto.getComponents().getCodersMap(), equalTo(protoPipeline.getComponents().getCodersMap()));
    assertThat(fusedPipelineProto.getComponents().getWindowingStrategiesMap(), equalTo(protoPipeline.getComponents().getWindowingStrategiesMap()));
    assertThat(fusedPipelineProto.getComponents().getEnvironmentsMap(), equalTo(protoPipeline.getComponents().getEnvironmentsMap()));
    assertThat(fusedPipelineProto.getComponents().getPcollectionsMap(), equalTo(protoPipeline.getComponents().getPcollectionsMap()));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 17 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class ProcessBundleDescriptors method fromExecutableStageInternal.

private static ExecutableProcessBundleDescriptor fromExecutableStageInternal(String id, ExecutableStage stage, ApiServiceDescriptor dataEndpoint, @Nullable ApiServiceDescriptor stateEndpoint) throws IOException {
    // Create with all of the processing transforms, and all of the components.
    // TODO: Remove the unreachable subcomponents if the size of the descriptor matters.
    Map<String, PTransform> stageTransforms = stage.getTransforms().stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform));
    Components.Builder components = stage.getComponents().toBuilder().clearTransforms().putAllTransforms(stageTransforms);
    ImmutableList.Builder<RemoteInputDestination> inputDestinationsBuilder = ImmutableList.builder();
    ImmutableMap.Builder<String, Coder> remoteOutputCodersBuilder = ImmutableMap.builder();
    WireCoderSetting wireCoderSetting = stage.getWireCoderSettings().stream().filter(ws -> ws.getInputOrOutputId().equals(stage.getInputPCollection().getId())).findAny().orElse(WireCoderSetting.getDefaultInstance());
    // The order of these does not matter.
    inputDestinationsBuilder.add(addStageInput(dataEndpoint, stage.getInputPCollection(), components, wireCoderSetting));
    remoteOutputCodersBuilder.putAll(addStageOutputs(dataEndpoint, stage.getOutputPCollections(), components, stage.getWireCoderSettings()));
    Map<String, Map<String, SideInputSpec>> sideInputSpecs = addSideInputs(stage, components);
    Map<String, Map<String, BagUserStateSpec>> bagUserStateSpecs = forBagUserStates(stage, components.build());
    Map<String, Map<String, TimerSpec>> timerSpecs = forTimerSpecs(stage, components);
    lengthPrefixAnyInputCoder(stage.getInputPCollection().getId(), components);
    // Copy data from components to ProcessBundleDescriptor.
    ProcessBundleDescriptor.Builder bundleDescriptorBuilder = ProcessBundleDescriptor.newBuilder().setId(id);
    if (stateEndpoint != null) {
        bundleDescriptorBuilder.setStateApiServiceDescriptor(stateEndpoint);
    }
    if (timerSpecs.size() > 0) {
        // By default use the data endpoint for timers, in the future considering enabling specifying
        // a different ApiServiceDescriptor for timers.
        bundleDescriptorBuilder.setTimerApiServiceDescriptor(dataEndpoint);
    }
    bundleDescriptorBuilder.putAllCoders(components.getCodersMap()).putAllEnvironments(components.getEnvironmentsMap()).putAllPcollections(components.getPcollectionsMap()).putAllWindowingStrategies(components.getWindowingStrategiesMap()).putAllTransforms(components.getTransformsMap());
    return ExecutableProcessBundleDescriptor.of(bundleDescriptorBuilder.build(), inputDestinationsBuilder.build(), remoteOutputCodersBuilder.build(), sideInputSpecs, bagUserStateSpecs, timerSpecs);
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) RemoteInputDestination(org.apache.beam.runners.fnexecution.data.RemoteInputDestination) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 18 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class ProcessBundleDescriptors method addSideInputs.

private static Map<String, Map<String, SideInputSpec>> addSideInputs(ExecutableStage stage, Components.Builder components) throws IOException {
    ImmutableTable.Builder<String, String, SideInputSpec> idsToSpec = ImmutableTable.builder();
    for (SideInputReference sideInputReference : stage.getSideInputs()) {
        // Update the coder specification for side inputs to be length prefixed so that the
        // SDK and Runner agree on how to encode/decode the key, window, and values for
        // side inputs.
        PCollectionNode pcNode = sideInputReference.collection();
        PCollection pc = pcNode.getPCollection();
        String lengthPrefixedCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(pc.getCoderId(), components, false);
        components.putPcollections(pcNode.getId(), pc.toBuilder().setCoderId(lengthPrefixedCoderId).build());
        FullWindowedValueCoder<KV<?, ?>> coder = (FullWindowedValueCoder) WireCoders.instantiateRunnerWireCoder(pcNode, components.build());
        idsToSpec.put(sideInputReference.transform().getId(), sideInputReference.localName(), SideInputSpec.of(sideInputReference.transform().getId(), sideInputReference.localName(), getAccessPattern(sideInputReference), coder.getValueCoder(), coder.getWindowCoder()));
    }
    return idsToSpec.build().rowMap();
}
Also used : PCollection(org.apache.beam.model.pipeline.v1.RunnerApi.PCollection) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) SideInputReference(org.apache.beam.runners.core.construction.graph.SideInputReference) KV(org.apache.beam.sdk.values.KV) ImmutableTable(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableTable) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)

Example 19 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class ProcessBundleDescriptors method addStageOutputs.

private static Map<String, Coder<WindowedValue<?>>> addStageOutputs(ApiServiceDescriptor dataEndpoint, Collection<PCollectionNode> outputPCollections, Components.Builder components, Collection<WireCoderSetting> wireCoderSettings) throws IOException {
    Map<String, Coder<WindowedValue<?>>> remoteOutputCoders = new LinkedHashMap<>();
    for (PCollectionNode outputPCollection : outputPCollections) {
        WireCoderSetting wireCoderSetting = wireCoderSettings.stream().filter(ws -> ws.getInputOrOutputId().equals(outputPCollection.getId())).findAny().orElse(WireCoderSetting.getDefaultInstance());
        OutputEncoding outputEncoding = addStageOutput(dataEndpoint, components, outputPCollection, wireCoderSetting);
        remoteOutputCoders.put(outputEncoding.getPTransformId(), outputEncoding.getCoder());
    }
    return remoteOutputCoders;
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode) LinkedHashMap(java.util.LinkedHashMap)

Example 20 with Components

use of org.apache.beam.model.pipeline.v1.RunnerApi.Components in project beam by apache.

the class ProcessBundleDescriptors method addStageOutput.

private static OutputEncoding addStageOutput(ApiServiceDescriptor dataEndpoint, Components.Builder components, PCollectionNode outputPCollection, WireCoderSetting wireCoderSetting) throws IOException {
    String outputWireCoderId = WireCoders.addSdkWireCoder(outputPCollection, components, wireCoderSetting);
    @SuppressWarnings("unchecked") Coder<WindowedValue<?>> wireCoder = (Coder) WireCoders.instantiateRunnerWireCoder(outputPCollection, components.build(), wireCoderSetting);
    RemoteGrpcPort outputPort = RemoteGrpcPort.newBuilder().setApiServiceDescriptor(dataEndpoint).setCoderId(outputWireCoderId).build();
    RemoteGrpcPortWrite outputWrite = RemoteGrpcPortWrite.writeToPort(outputPCollection.getId(), outputPort);
    String outputId = uniqueId(String.format("fn/write/%s", outputPCollection.getId()), components::containsTransforms);
    PTransform outputTransform = outputWrite.toPTransform();
    components.putTransforms(outputId, outputTransform);
    return new AutoValue_ProcessBundleDescriptors_OutputEncoding(outputId, wireCoder);
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) RemoteGrpcPort(org.apache.beam.model.fnexecution.v1.BeamFnApi.RemoteGrpcPort) WindowedValue(org.apache.beam.sdk.util.WindowedValue) RemoteGrpcPortWrite(org.apache.beam.sdk.fn.data.RemoteGrpcPortWrite) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Aggregations

Test (org.junit.Test)55 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)49 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)40 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)31 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 Map (java.util.Map)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)16 IOException (java.io.IOException)15 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)15 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)14 Coder (org.apache.beam.sdk.coders.Coder)14 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 Pipeline (org.apache.beam.sdk.Pipeline)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)12 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)11 KvCoder (org.apache.beam.sdk.coders.KvCoder)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)11 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)10