Search in sources :

Example 41 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class RegisterAndProcessBundleOperation method extractCrossBoundaryGrpcPCollectionNames.

private Set<String> extractCrossBoundaryGrpcPCollectionNames(final Set<Entry<String, PTransform>> ptransforms) {
    Set<String> result = new HashSet<>();
    // GRPC Read/Write expected to only have one Output/Input respectively.
    for (Map.Entry<String, RunnerApi.PTransform> pTransform : ptransforms) {
        if (pTransform.getValue().getSpec().getUrn().equals(RemoteGrpcPortRead.URN)) {
            String grpcReadTransformOutputName = Iterables.getOnlyElement(pTransform.getValue().getOutputsMap().keySet());
            String pcollectionName = pTransform.getValue().getOutputsMap().get(grpcReadTransformOutputName);
            result.add(pcollectionName);
        }
        if (pTransform.getValue().getSpec().getUrn().equals(RemoteGrpcPortWrite.URN)) {
            String grpcTransformInputName = Iterables.getOnlyElement(pTransform.getValue().getInputsMap().keySet());
            String pcollectionName = pTransform.getValue().getInputsMap().get(grpcTransformInputName);
            result.add(pcollectionName);
        }
    }
    return result;
}
Also used : ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashSet(java.util.HashSet) PTransform(org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)

Example 42 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class CreateExecutableStageNodeFunction method transformCombineValuesFnToFunctionSpec.

/**
 * Transforms a CombineValuesFn {@link ParDoInstruction} to an Apache Beam {@link
 * RunnerApi.FunctionSpec}.
 */
private RunnerApi.FunctionSpec.Builder transformCombineValuesFnToFunctionSpec(CloudObject userFn) {
    // Grab the Combine PTransform. This transform is the composite PTransform representing the
    // entire CombinePerKey, and it contains the CombinePayload we need.
    String combinePTransformId = getString(userFn, PropertyNames.SERIALIZED_FN);
    RunnerApi.PTransform combinePerKeyPTransform = pipeline.getComponents().getTransformsOrDefault(combinePTransformId, null);
    checkArgument(combinePerKeyPTransform != null, "Transform with id \"%s\" not found in pipeline.", combinePTransformId);
    checkArgument(combinePerKeyPTransform.getSpec().getUrn().equals(COMBINE_PER_KEY_URN), "Found transform \"%s\" for Combine instruction, " + "but that transform had unexpected URN \"%s\" (expected \"%s\")", combinePerKeyPTransform, combinePerKeyPTransform.getSpec().getUrn(), COMBINE_PER_KEY_URN);
    RunnerApi.CombinePayload combinePayload;
    try {
        combinePayload = RunnerApi.CombinePayload.parseFrom(combinePerKeyPTransform.getSpec().getPayload());
    } catch (InvalidProtocolBufferException exc) {
        throw new RuntimeException("Combine did not have a CombinePayload", exc);
    }
    String phase = getString(userFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
    String urn;
    switch(phase) {
        case CombinePhase.ALL:
            urn = COMBINE_GROUPED_VALUES_URN;
            break;
        case CombinePhase.ADD:
            urn = COMBINE_PRECOMBINE_URN;
            break;
        case CombinePhase.MERGE:
            urn = COMBINE_MERGE_URN;
            break;
        case CombinePhase.EXTRACT:
            urn = COMBINE_EXTRACT_URN;
            break;
        default:
            throw new RuntimeException("Encountered unknown Combine Phase: " + phase);
    }
    return RunnerApi.FunctionSpec.newBuilder().setUrn(urn).setPayload(combinePayload.toByteString());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)

Example 43 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class RegisterNodeFunction method transformSideInputForSdk.

/**
 * Modifies the process bundle descriptor and updates the PTransform that the SDK harness will see
 * with length prefixed coders used on the side input PCollection and windowing strategy.
 */
private static final void transformSideInputForSdk(RunnerApi.Pipeline pipeline, RunnerApi.PTransform originalPTransform, String sideInputTag, ProcessBundleDescriptor.Builder processBundleDescriptor, RunnerApi.PTransform.Builder updatedPTransform) {
    RunnerApi.PCollection sideInputPCollection = pipeline.getComponents().getPcollectionsOrThrow(originalPTransform.getInputsOrThrow(sideInputTag));
    RunnerApi.WindowingStrategy sideInputWindowingStrategy = pipeline.getComponents().getWindowingStrategiesOrThrow(sideInputPCollection.getWindowingStrategyId());
    // TODO: We should not length prefix the window or key for the SDK side since the
    // key and window are already length delimited via protobuf itself. But we need to
    // maintain the length prefixing within the Runner harness to match the bytes that were
    // materialized to the side input sink.
    // We take the original pipeline coders and add any coders we have added when processing side
    // inputs before building new length prefixed variants.
    RunnerApi.Components.Builder componentsBuilder = pipeline.getComponents().toBuilder();
    componentsBuilder.putAllCoders(processBundleDescriptor.getCodersMap());
    String updatedSdkSideInputCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(sideInputPCollection.getCoderId(), componentsBuilder, false);
    String updatedSdkSideInputWindowCoderId = LengthPrefixUnknownCoders.addLengthPrefixedCoder(sideInputWindowingStrategy.getWindowCoderId(), componentsBuilder, false);
    processBundleDescriptor.putAllCoders(componentsBuilder.getCodersMap());
    String updatedSdkWindowingStrategyId = SyntheticComponents.uniqueId(sideInputPCollection.getWindowingStrategyId() + "-runner_generated", processBundleDescriptor.getWindowingStrategiesMap().keySet()::contains);
    processBundleDescriptor.putWindowingStrategies(updatedSdkWindowingStrategyId, sideInputWindowingStrategy.toBuilder().setWindowCoderId(updatedSdkSideInputWindowCoderId).build());
    RunnerApi.PCollection updatedSdkSideInputPcollection = sideInputPCollection.toBuilder().setCoderId(updatedSdkSideInputCoderId).setWindowingStrategyId(updatedSdkWindowingStrategyId).build();
    // Replace the contents of the PCollection with the updated side input PCollection
    // specification and insert it into the update PTransform.
    processBundleDescriptor.putPcollections(originalPTransform.getInputsOrThrow(sideInputTag), updatedSdkSideInputPcollection);
    updatedPTransform.putInputs(sideInputTag, originalPTransform.getInputsOrThrow(sideInputTag));
}
Also used : SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) SyntheticComponents(org.apache.beam.runners.core.construction.SyntheticComponents) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)

Example 44 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class WindowMappingFnRunnerTest method testWindowMapping.

@Test
public void testWindowMapping() throws Exception {
    String pTransformId = "pTransformId";
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(WindowMappingFnRunner.URN).setPayload(ParDoTranslation.translateWindowMappingFn(new GlobalWindows().getDefaultWindowMappingFn(), components).toByteString()).build();
    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build();
    ThrowingFunction<KV<Object, BoundedWindow>, KV<Object, BoundedWindow>> mapFunction = WindowMappingFnRunner.createMapFunctionForPTransform(pTransformId, pTransform);
    KV<Object, BoundedWindow> input = KV.of("abc", new IntervalWindow(Instant.now(), Duration.standardMinutes(1)));
    assertEquals(KV.of(input.getKey(), GlobalWindow.INSTANCE), mapFunction.apply(input));
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) KV(org.apache.beam.sdk.values.KV) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 45 with PTransform

use of org.apache.beam.model.pipeline.v1.RunnerApi.PTransform in project beam by apache.

the class AssignWindowsRunnerTest method multipleInputWindowsAsMapFnSucceeds.

@Test
public void multipleInputWindowsAsMapFnSucceeds() throws Exception {
    WindowFn<Object, BoundedWindow> windowFn = new WindowFn<Object, BoundedWindow>() {

        @Override
        public Collection<BoundedWindow> assignWindows(AssignContext c) {
            c.window();
            return ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3)));
        }

        @Override
        public void mergeWindows(MergeContext c) {
            throw new UnsupportedOperationException();
        }

        @Override
        public WindowMappingFn<BoundedWindow> getDefaultWindowMappingFn() {
            throw new UnsupportedOperationException();
        }

        @Override
        public boolean isCompatible(WindowFn<?, ?> other) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Coder<BoundedWindow> windowCoder() {
            throw new UnsupportedOperationException();
        }
    };
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.PCollection pCollection = RunnerApi.PCollection.newBuilder().setUniqueName("input").setCoderId("coder-id").build();
    RunnerApi.Coder coder = CoderTranslation.toProto(VarIntCoder.of()).getCoder();
    PTransformRunnerFactoryTestContext context = PTransformRunnerFactoryTestContext.builder("ptransform", PTransform.newBuilder().putInputs("in", "input").putOutputs("out", "output").setSpec(FunctionSpec.newBuilder().setUrn(PTransformTranslation.ASSIGN_WINDOWS_TRANSFORM_URN).setPayload(WindowIntoPayload.newBuilder().setWindowFn(WindowingStrategyTranslation.toProto(windowFn, components)).build().toByteString())).build()).pCollections(Collections.singletonMap("input", pCollection)).coders(Collections.singletonMap("coder-id", coder)).build();
    Collection<WindowedValue<?>> outputs = new ArrayList<>();
    context.addPCollectionConsumer("output", outputs::add, VarIntCoder.of());
    MapFnRunners.forWindowedValueMapFnFactory(new AssignWindowsMapFnFactory<>()).createRunnerForPTransform(context);
    WindowedValue<Integer> value = WindowedValue.of(2, new Instant(-10L), ImmutableList.of(new IntervalWindow(new Instant(-22L), Duration.standardMinutes(5L)), new IntervalWindow(new Instant(-120000L), Duration.standardMinutes(3L))), PaneInfo.ON_TIME_AND_ONLY_FIRING);
    context.getPCollectionConsumer("input").accept(value);
    assertThat(outputs, containsInAnyOrder(WindowedValue.of(2, new Instant(-10L), ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3))), PaneInfo.ON_TIME_AND_ONLY_FIRING), WindowedValue.of(2, new Instant(-10L), ImmutableSet.of(GlobalWindow.INSTANCE, new IntervalWindow(new Instant(-500), Duration.standardMinutes(3))), PaneInfo.ON_TIME_AND_ONLY_FIRING)));
}
Also used : WindowFn(org.apache.beam.sdk.transforms.windowing.WindowFn) PartitioningWindowFn(org.apache.beam.sdk.transforms.windowing.PartitioningWindowFn) Instant(org.joda.time.Instant) ArrayList(java.util.ArrayList) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) WindowedValue(org.apache.beam.sdk.util.WindowedValue) AssignWindowsMapFnFactory(org.apache.beam.fn.harness.AssignWindowsRunner.AssignWindowsMapFnFactory) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)58 Test (org.junit.Test)41 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)28 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)23 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)19 Map (java.util.Map)18 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)18 ArrayList (java.util.ArrayList)16 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)16 WindowedValue (org.apache.beam.sdk.util.WindowedValue)11 Collection (java.util.Collection)9 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)9 Collectors (java.util.stream.Collectors)8 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)8 IOException (java.io.IOException)7 HashSet (java.util.HashSet)7 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)7 Pipeline (org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline)7