Search in sources :

Example 16 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class SdkHarnessClientTest method setup.

@Before
public void setup() throws Exception {
    MockitoAnnotations.initMocks(this);
    sdkHarnessClient = SdkHarnessClient.usingFnApiClient(fnApiControlClient, dataService);
    Pipeline userPipeline = Pipeline.create();
    TupleTag<String> outputTag = new TupleTag<>();
    userPipeline.apply("create", Create.of("foo")).apply("proc", ParDo.of(new TestFn()).withOutputTags(outputTag, TupleTagList.empty()));
    RunnerApi.Pipeline userProto = PipelineTranslation.toProto(userPipeline);
    ProcessBundleDescriptor.Builder pbdBuilder = ProcessBundleDescriptor.newBuilder().setId("my_id").putAllEnvironments(userProto.getComponents().getEnvironmentsMap()).putAllWindowingStrategies(userProto.getComponents().getWindowingStrategiesMap()).putAllCoders(userProto.getComponents().getCodersMap());
    RunnerApi.Coder fullValueCoder = CoderTranslation.toProto(WindowedValue.getFullCoder(StringUtf8Coder.of(), Coder.INSTANCE)).getCoder();
    pbdBuilder.putCoders("wire_coder", fullValueCoder);
    PTransform targetProcessor = userProto.getComponents().getTransformsOrThrow("proc");
    RemoteGrpcPort port = RemoteGrpcPort.newBuilder().setApiServiceDescriptor(harness.dataEndpoint()).setCoderId("wire_coder").build();
    RemoteGrpcPortRead readNode = RemoteGrpcPortRead.readFromPort(port, getOnlyElement(targetProcessor.getInputsMap().values()));
    RemoteGrpcPortWrite writeNode = RemoteGrpcPortWrite.writeToPort(getOnlyElement(targetProcessor.getOutputsMap().values()), port);
    // TODO: Ensure cross-env (Runner <-> SDK GRPC Read/Write Node) coders are length-prefixed
    for (String pc : targetProcessor.getInputsMap().values()) {
        pbdBuilder.putPcollections(pc, userProto.getComponents().getPcollectionsOrThrow(pc));
    }
    for (String pc : targetProcessor.getOutputsMap().values()) {
        pbdBuilder.putPcollections(pc, userProto.getComponents().getPcollectionsOrThrow(pc));
    }
    pbdBuilder.putTransforms("proc", targetProcessor).putTransforms(SDK_GRPC_READ_TRANSFORM, readNode.toPTransform()).putTransforms(SDK_GRPC_WRITE_TRANSFORM, writeNode.toPTransform());
    descriptor = pbdBuilder.build();
}
Also used : RemoteGrpcPort(org.apache.beam.model.fnexecution.v1.BeamFnApi.RemoteGrpcPort) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) RemoteGrpcPortRead(org.apache.beam.sdk.fn.data.RemoteGrpcPortRead) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) RemoteGrpcPortWrite(org.apache.beam.sdk.fn.data.RemoteGrpcPortWrite) Before(org.junit.Before)

Example 17 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class ProcessBundleDescriptors method fromExecutableStageInternal.

private static ExecutableProcessBundleDescriptor fromExecutableStageInternal(String id, ExecutableStage stage, ApiServiceDescriptor dataEndpoint, @Nullable ApiServiceDescriptor stateEndpoint) throws IOException {
    // Create with all of the processing transforms, and all of the components.
    // TODO: Remove the unreachable subcomponents if the size of the descriptor matters.
    Map<String, PTransform> stageTransforms = stage.getTransforms().stream().collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform));
    Components.Builder components = stage.getComponents().toBuilder().clearTransforms().putAllTransforms(stageTransforms);
    ImmutableList.Builder<RemoteInputDestination> inputDestinationsBuilder = ImmutableList.builder();
    ImmutableMap.Builder<String, Coder> remoteOutputCodersBuilder = ImmutableMap.builder();
    WireCoderSetting wireCoderSetting = stage.getWireCoderSettings().stream().filter(ws -> ws.getInputOrOutputId().equals(stage.getInputPCollection().getId())).findAny().orElse(WireCoderSetting.getDefaultInstance());
    // The order of these does not matter.
    inputDestinationsBuilder.add(addStageInput(dataEndpoint, stage.getInputPCollection(), components, wireCoderSetting));
    remoteOutputCodersBuilder.putAll(addStageOutputs(dataEndpoint, stage.getOutputPCollections(), components, stage.getWireCoderSettings()));
    Map<String, Map<String, SideInputSpec>> sideInputSpecs = addSideInputs(stage, components);
    Map<String, Map<String, BagUserStateSpec>> bagUserStateSpecs = forBagUserStates(stage, components.build());
    Map<String, Map<String, TimerSpec>> timerSpecs = forTimerSpecs(stage, components);
    lengthPrefixAnyInputCoder(stage.getInputPCollection().getId(), components);
    // Copy data from components to ProcessBundleDescriptor.
    ProcessBundleDescriptor.Builder bundleDescriptorBuilder = ProcessBundleDescriptor.newBuilder().setId(id);
    if (stateEndpoint != null) {
        bundleDescriptorBuilder.setStateApiServiceDescriptor(stateEndpoint);
    }
    if (timerSpecs.size() > 0) {
        // By default use the data endpoint for timers, in the future considering enabling specifying
        // a different ApiServiceDescriptor for timers.
        bundleDescriptorBuilder.setTimerApiServiceDescriptor(dataEndpoint);
    }
    bundleDescriptorBuilder.putAllCoders(components.getCodersMap()).putAllEnvironments(components.getEnvironmentsMap()).putAllPcollections(components.getPcollectionsMap()).putAllWindowingStrategies(components.getWindowingStrategiesMap()).putAllTransforms(components.getTransformsMap());
    return ExecutableProcessBundleDescriptor.of(bundleDescriptorBuilder.build(), inputDestinationsBuilder.build(), remoteOutputCodersBuilder.build(), sideInputSpecs, bagUserStateSpecs, timerSpecs);
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) RemoteInputDestination(org.apache.beam.runners.fnexecution.data.RemoteInputDestination) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) ProcessBundleDescriptor(org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor) WireCoderSetting(org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload.WireCoderSetting) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) RehydratedComponents(org.apache.beam.runners.core.construction.RehydratedComponents) Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 18 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class ProcessBundleDescriptors method addStageOutput.

private static OutputEncoding addStageOutput(ApiServiceDescriptor dataEndpoint, Components.Builder components, PCollectionNode outputPCollection, WireCoderSetting wireCoderSetting) throws IOException {
    String outputWireCoderId = WireCoders.addSdkWireCoder(outputPCollection, components, wireCoderSetting);
    @SuppressWarnings("unchecked") Coder<WindowedValue<?>> wireCoder = (Coder) WireCoders.instantiateRunnerWireCoder(outputPCollection, components.build(), wireCoderSetting);
    RemoteGrpcPort outputPort = RemoteGrpcPort.newBuilder().setApiServiceDescriptor(dataEndpoint).setCoderId(outputWireCoderId).build();
    RemoteGrpcPortWrite outputWrite = RemoteGrpcPortWrite.writeToPort(outputPCollection.getId(), outputPort);
    String outputId = uniqueId(String.format("fn/write/%s", outputPCollection.getId()), components::containsTransforms);
    PTransform outputTransform = outputWrite.toPTransform();
    components.putTransforms(outputId, outputTransform);
    return new AutoValue_ProcessBundleDescriptors_OutputEncoding(outputId, wireCoder);
}
Also used : Coder(org.apache.beam.sdk.coders.Coder) ByteStringCoder(org.apache.beam.runners.fnexecution.wire.ByteStringCoder) FullWindowedValueCoder(org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder) RemoteGrpcPort(org.apache.beam.model.fnexecution.v1.BeamFnApi.RemoteGrpcPort) WindowedValue(org.apache.beam.sdk.util.WindowedValue) RemoteGrpcPortWrite(org.apache.beam.sdk.fn.data.RemoteGrpcPortWrite) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 19 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class AssignWindowsRunnerTest method factoryCreatesFromJavaWindowFn.

@Test
public void factoryCreatesFromJavaWindowFn() throws Exception {
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    PTransform windowPTransform = PTransform.newBuilder().putInputs("in", "input").putOutputs("out", "output").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(WindowingStrategyTranslation.toProto(new TestWindowFn(), components)).build().toByteString()).build()).build();
    ThrowingFunction<WindowedValue<?>, WindowedValue<?>> fn = (ThrowingFunction) factory.forPTransform("transform", windowPTransform);
    assertThat(fn.apply(WindowedValue.of(22L, new Instant(5), new IntervalWindow(new Instant(0L), new Instant(20027L)), PaneInfo.ON_TIME_AND_ONLY_FIRING)), equalTo(WindowedValue.of(22L, new Instant(5), new TestWindowFn().assignWindow(new Instant(5)), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
}
Also used : ThrowingFunction(org.apache.beam.sdk.function.ThrowingFunction) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Instant(org.joda.time.Instant) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Example 20 with PTransform

use of org.apache.beam.sdk.common.runner.v1.RunnerApi.PTransform in project beam by apache.

the class AssignWindowsRunnerTest method factoryCreatesFromKnownWindowFn.

@Test
public void factoryCreatesFromKnownWindowFn() throws Exception {
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    PTransform windowPTransform = PTransform.newBuilder().putInputs("in", "input").putOutputs("out", "output").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(WindowingStrategyTranslation.toProto(Sessions.withGapDuration(Duration.standardMinutes(12L)), components)).build().toByteString()).build()).build();
    ThrowingFunction<WindowedValue<?>, WindowedValue<?>> fn = (ThrowingFunction) factory.forPTransform("transform", windowPTransform);
    WindowedValue<?> output = fn.apply(WindowedValue.of(22L, new Instant(5), new IntervalWindow(new Instant(0L), new Instant(20027L)), PaneInfo.ON_TIME_AND_ONLY_FIRING));
    assertThat(output, equalTo(WindowedValue.of(22L, new Instant(5), new IntervalWindow(new Instant(5L), Duration.standardMinutes(12L)), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
}
Also used : ThrowingFunction(org.apache.beam.sdk.function.ThrowingFunction) WindowedValue(org.apache.beam.sdk.util.WindowedValue) Instant(org.joda.time.Instant) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform) Test(org.junit.Test)

Aggregations

PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)58 Test (org.junit.Test)34 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)20 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)19 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)18 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)16 Map (java.util.Map)14 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)10 Collection (java.util.Collection)7 Pipeline (org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)7 ArrayList (java.util.ArrayList)6 HashSet (java.util.HashSet)6 Collectors (java.util.stream.Collectors)6 DeduplicationResult (org.apache.beam.runners.core.construction.graph.OutputDeduplicator.DeduplicationResult)6 LinkedHashSet (java.util.LinkedHashSet)5 RemoteGrpcPort (org.apache.beam.model.fnexecution.v1.BeamFnApi.RemoteGrpcPort)5 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)5 ExecutableStagePayload (org.apache.beam.model.pipeline.v1.RunnerApi.ExecutableStagePayload)4 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)4