Search in sources :

Example 1 with CombinePayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload in project beam by apache.

the class PipelineValidator method validateCombine.

private static void validateCombine(String id, PTransform transform, Components components, Set<String> requirements) throws Exception {
    CombinePayload payload = CombinePayload.parseFrom(transform.getSpec().getPayload());
    checkArgument(components.containsCoders(payload.getAccumulatorCoderId()), "Transform %s uses unknown accumulator coder id %s", id, payload.getAccumulatorCoderId());
}
Also used : CombinePayload(org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload)

Example 2 with CombinePayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload in project beam by apache.

the class PipelineTranslationTest method getAccumulatorCoder.

private static Coder<?> getAccumulatorCoder(AppliedPTransform<?, ?, ?> transform) throws IOException {
    SdkComponents sdkComponents = SdkComponents.create(transform.getPipeline().getOptions());
    String id = getCombinePayload(transform, sdkComponents).map(CombinePayload::getAccumulatorCoderId).orElseThrow(() -> new IOException("Transform does not contain an AccumulatorCoder"));
    Components components = sdkComponents.toComponents();
    return CoderTranslation.fromProto(components.getCodersOrThrow(id), RehydratedComponents.forComponents(components), TranslationContext.DEFAULT);
}
Also used : Components(org.apache.beam.model.pipeline.v1.RunnerApi.Components) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) IOException(java.io.IOException)

Example 3 with CombinePayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload in project beam by apache.

the class CreateExecutableStageNodeFunction method transformCombineValuesFnToFunctionSpec.

/**
 * Transforms a CombineValuesFn {@link ParDoInstruction} to an Apache Beam {@link
 * RunnerApi.FunctionSpec}.
 */
private RunnerApi.FunctionSpec.Builder transformCombineValuesFnToFunctionSpec(CloudObject userFn) {
    // Grab the Combine PTransform. This transform is the composite PTransform representing the
    // entire CombinePerKey, and it contains the CombinePayload we need.
    String combinePTransformId = getString(userFn, PropertyNames.SERIALIZED_FN);
    RunnerApi.PTransform combinePerKeyPTransform = pipeline.getComponents().getTransformsOrDefault(combinePTransformId, null);
    checkArgument(combinePerKeyPTransform != null, "Transform with id \"%s\" not found in pipeline.", combinePTransformId);
    checkArgument(combinePerKeyPTransform.getSpec().getUrn().equals(COMBINE_PER_KEY_URN), "Found transform \"%s\" for Combine instruction, " + "but that transform had unexpected URN \"%s\" (expected \"%s\")", combinePerKeyPTransform, combinePerKeyPTransform.getSpec().getUrn(), COMBINE_PER_KEY_URN);
    RunnerApi.CombinePayload combinePayload;
    try {
        combinePayload = RunnerApi.CombinePayload.parseFrom(combinePerKeyPTransform.getSpec().getPayload());
    } catch (InvalidProtocolBufferException exc) {
        throw new RuntimeException("Combine did not have a CombinePayload", exc);
    }
    String phase = getString(userFn, WorkerPropertyNames.PHASE, CombinePhase.ALL);
    String urn;
    switch(phase) {
        case CombinePhase.ALL:
            urn = COMBINE_GROUPED_VALUES_URN;
            break;
        case CombinePhase.ADD:
            urn = COMBINE_PRECOMBINE_URN;
            break;
        case CombinePhase.MERGE:
            urn = COMBINE_MERGE_URN;
            break;
        case CombinePhase.EXTRACT:
            urn = COMBINE_EXTRACT_URN;
            break;
        default:
            throw new RuntimeException("Encountered unknown Combine Phase: " + phase);
    }
    return RunnerApi.FunctionSpec.newBuilder().setUrn(urn).setPayload(combinePayload.toByteString());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)

Example 4 with CombinePayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload in project beam by apache.

the class CombineTranslationTest method testToFromProto.

@Test
public void testToFromProto() throws Exception {
    PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3));
    input.apply(Combine.globally(combineFn));
    final AtomicReference<AppliedPTransform<?, ?, Combine.PerKey<?, ?, ?>>> combine = new AtomicReference<>();
    pipeline.traverseTopologically(new PipelineVisitor.Defaults() {

        @Override
        public void leaveCompositeTransform(Node node) {
            if (node.getTransform() instanceof Combine.PerKey) {
                checkState(combine.get() == null);
                combine.set((AppliedPTransform) node.toAppliedPTransform(getPipeline()));
            }
        }
    });
    checkState(combine.get() != null);
    SdkComponents sdkComponents = SdkComponents.create();
    CombinePayload combineProto = CombineTranslation.toProto(combine.get(), sdkComponents);
    RunnerApi.Components componentsProto = sdkComponents.toComponents();
    assertEquals(combineFn.getAccumulatorCoder(pipeline.getCoderRegistry(), input.getCoder()), CombineTranslation.getAccumulatorCoder(combineProto, componentsProto));
    assertEquals(combineFn, CombineTranslation.getCombineFn(combineProto));
}
Also used : Combine(org.apache.beam.sdk.transforms.Combine) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) AtomicReference(java.util.concurrent.atomic.AtomicReference) RunnerApi(org.apache.beam.sdk.common.runner.v1.RunnerApi) CombinePayload(org.apache.beam.sdk.common.runner.v1.RunnerApi.CombinePayload) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) Test(org.junit.Test)

Example 5 with CombinePayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload in project beam by apache.

the class CombineRunners method createCombineGroupedValuesMapFunction.

static <KeyT, InputT, AccumT, OutputT> ThrowingFunction<KV<KeyT, Iterable<InputT>>, KV<KeyT, OutputT>> createCombineGroupedValuesMapFunction(String pTransformId, PTransform pTransform) throws IOException {
    CombinePayload combinePayload = CombinePayload.parseFrom(pTransform.getSpec().getPayload());
    CombineFn<InputT, AccumT, OutputT> combineFn = (CombineFn) SerializableUtils.deserializeFromByteArray(combinePayload.getCombineFn().getPayload().toByteArray(), "CombineFn");
    return (KV<KeyT, Iterable<InputT>> input) -> {
        return KV.of(input.getKey(), combineFn.apply(input.getValue()));
    };
}
Also used : CombinePayload(org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload) CombineFn(org.apache.beam.sdk.transforms.Combine.CombineFn)

Aggregations

CombinePayload (org.apache.beam.model.pipeline.v1.RunnerApi.CombinePayload)5 CombineFn (org.apache.beam.sdk.transforms.Combine.CombineFn)4 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)3 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)2 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)2 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)2 IOException (java.io.IOException)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)1 PipelineVisitor (org.apache.beam.sdk.Pipeline.PipelineVisitor)1 RunnerApi (org.apache.beam.sdk.common.runner.v1.RunnerApi)1 CombinePayload (org.apache.beam.sdk.common.runner.v1.RunnerApi.CombinePayload)1 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)1 Node (org.apache.beam.sdk.runners.TransformHierarchy.Node)1 Combine (org.apache.beam.sdk.transforms.Combine)1 Test (org.junit.Test)1