Search in sources :

Example 11 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class KafkaIOExternalTest method testConstructKafkaWrite.

@Test
public void testConstructKafkaWrite() throws Exception {
    String topic = "topic";
    String keySerializer = "org.apache.kafka.common.serialization.ByteArraySerializer";
    String valueSerializer = "org.apache.kafka.common.serialization.LongSerializer";
    ImmutableMap<String, String> producerConfig = ImmutableMap.<String, String>builder().put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "server1:port,server2:port").put("retries", "3").build();
    ExternalTransforms.ExternalConfigurationPayload payload = encodeRow(Row.withSchema(Schema.of(Field.of("topic", FieldType.STRING), Field.of("producer_config", FieldType.map(FieldType.STRING, FieldType.STRING)), Field.of("key_serializer", FieldType.STRING), Field.of("value_serializer", FieldType.STRING))).withFieldValue("topic", topic).withFieldValue("producer_config", producerConfig).withFieldValue("key_serializer", keySerializer).withFieldValue("value_serializer", valueSerializer).build());
    Pipeline p = Pipeline.create();
    p.apply(Impulse.create()).apply(WithKeys.of("key"));
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    String inputPCollection = Iterables.getOnlyElement(Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
    ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName("test").putInputs("input", inputPCollection).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(org.apache.beam.sdk.io.kafka.KafkaIO.Write.External.URN).setPayload(payload.toByteString()))).setNamespace("test_namespace").build();
    ExpansionService expansionService = new ExpansionService();
    TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
    expansionService.expand(request, observer);
    ExpansionApi.ExpansionResponse result = observer.result;
    RunnerApi.PTransform transform = result.getTransform();
    assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*Kafka-ProducerRecord.*")));
    assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*KafkaIO-WriteRecords.*")));
    assertThat(transform.getInputsCount(), Matchers.is(1));
    assertThat(transform.getOutputsCount(), Matchers.is(0));
    RunnerApi.PTransform writeComposite = result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
    RunnerApi.PTransform writeParDo = result.getComponents().getTransformsOrThrow(result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(0)).getSubtransforms(0));
    RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
    DoFn kafkaWriter = ParDoTranslation.getDoFn(parDoPayload);
    assertThat(kafkaWriter, Matchers.instanceOf(KafkaWriter.class));
    KafkaIO.WriteRecords spec = (KafkaIO.WriteRecords) Whitebox.getInternalState(kafkaWriter, "spec");
    assertThat(spec.getProducerConfig(), Matchers.is(producerConfig));
    assertThat(spec.getTopic(), Matchers.is(topic));
    assertThat(spec.getKeySerializer().getName(), Matchers.is(keySerializer));
    assertThat(spec.getValueSerializer().getName(), Matchers.is(valueSerializer));
}
Also used : ExpansionService(org.apache.beam.sdk.expansion.service.ExpansionService) ExternalTransforms(org.apache.beam.model.pipeline.v1.ExternalTransforms) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) ExternalConfigurationPayload(org.apache.beam.model.pipeline.v1.ExternalTransforms.ExternalConfigurationPayload) Test(org.junit.Test)

Example 12 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class PubsubIOExternalTest method testConstructPubsubWrite.

@Test
public void testConstructPubsubWrite() throws Exception {
    String topic = "projects/project-1234/topics/topic_name";
    String idAttribute = "id_foo";
    ExternalTransforms.ExternalConfigurationPayload payload = encodeRow(Row.withSchema(Schema.of(Field.of("topic", FieldType.STRING), Field.of("id_label", FieldType.STRING))).withFieldValue("topic", topic).withFieldValue("id_label", idAttribute).build());
    // Requirements are not passed as part of the expansion service so the validation
    // fails because of how we construct the pipeline to expand the transform since it now
    // has a transform with a requirement.
    Pipeline p = Pipeline.create();
    p.apply("unbounded", Impulse.create()).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    String inputPCollection = Iterables.getOnlyElement(Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
    ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName("test").putInputs("input", inputPCollection).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(ExternalWrite.URN).setPayload(payload.toByteString()))).setNamespace("test_namespace").build();
    ExpansionService expansionService = new ExpansionService();
    TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
    expansionService.expand(request, observer);
    ExpansionApi.ExpansionResponse result = observer.result;
    RunnerApi.PTransform transform = result.getTransform();
    assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*MapElements.*")));
    assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*PubsubUnboundedSink.*")));
    assertThat(transform.getInputsCount(), Matchers.is(1));
    assertThat(transform.getOutputsCount(), Matchers.is(0));
    // test_namespacetest/PubsubUnboundedSink
    RunnerApi.PTransform writeComposite = result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
    // test_namespacetest/PubsubUnboundedSink/PubsubSink
    RunnerApi.PTransform writeComposite2 = result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(1));
    // test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer
    RunnerApi.PTransform writeComposite3 = result.getComponents().getTransformsOrThrow(writeComposite2.getSubtransforms(3));
    // test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer/ParMultiDo(Writer)
    RunnerApi.PTransform writeParDo = result.getComponents().getTransformsOrThrow(writeComposite3.getSubtransforms(0));
    RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
    DoFn<?, ?> pubsubWriter = ParDoTranslation.getDoFn(parDoPayload);
    String idAttributeActual = (String) Whitebox.getInternalState(pubsubWriter, "idAttribute");
    ValueProvider<PubsubClient.TopicPath> topicActual = (ValueProvider<PubsubClient.TopicPath>) Whitebox.getInternalState(pubsubWriter, "topic");
    assertThat(topicActual == null ? null : String.valueOf(topicActual), Matchers.is(topic));
    assertThat(idAttributeActual, Matchers.is(idAttribute));
}
Also used : ExpansionService(org.apache.beam.sdk.expansion.service.ExpansionService) ExternalTransforms(org.apache.beam.model.pipeline.v1.ExternalTransforms) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) ValueProvider(org.apache.beam.sdk.options.ValueProvider) Test(org.junit.Test)

Example 13 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class GreedyPCollectionFusers method canFuseParDo.

/**
 * A ParDo can be fused into a stage if it executes in the same Environment as that stage, and no
 * transform that are upstream of any of its side input are present in that stage.
 *
 * <p>A ParDo that consumes a side input cannot process an element until all of the side inputs
 * contain data for the side input window that contains the element.
 */
private static boolean canFuseParDo(PTransformNode parDo, Environment environment, PCollectionNode candidate, Collection<PCollectionNode> stagePCollections, QueryablePipeline pipeline) {
    Optional<Environment> env = pipeline.getEnvironment(parDo);
    checkArgument(env.isPresent(), "A %s must have an %s associated with it", ParDoPayload.class.getSimpleName(), Environment.class.getSimpleName());
    if (!env.get().equals(environment)) {
        // is never possible.
        return false;
    }
    try {
        ParDoPayload payload = ParDoPayload.parseFrom(parDo.getTransform().getSpec().getPayload());
        if (Maps.filterKeys(parDo.getTransform().getInputsMap(), s -> payload.getTimerFamilySpecsMap().containsKey(s)).values().contains(candidate.getId())) {
            // Allow fusion across timer PCollections because they are a self loop.
            return true;
        } else if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
            // key-partitioned and preserves keys, these ParDos do not fuse into an existing stage.
            return false;
        } else if (!pipeline.getSideInputs(parDo).isEmpty()) {
            // executable stage alongside any transforms which are upstream of any of its side inputs.
            return false;
        }
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalArgumentException(e);
    }
    return true;
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment)

Example 14 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method getAdditionalOutputTags.

public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application) throws IOException {
    PTransform<?, ?> transform = application.getTransform();
    if (transform instanceof ParDo.MultiOutput) {
        return ((ParDo.MultiOutput<?, ?>) transform).getAdditionalOutputTags();
    }
    RunnerApi.PTransform protoTransform = PTransformTranslation.toProto(application, SdkComponents.create(application.getPipeline().getOptions()));
    ParDoPayload payload = ParDoPayload.parseFrom(protoTransform.getSpec().getPayload());
    TupleTag<?> mainOutputTag = getMainOutputTag(payload);
    Set<String> outputTags = Sets.difference(protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId()));
    ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>();
    for (String outputTag : outputTags) {
        additionalOutputTags.add(new TupleTag<>(outputTag));
    }
    return TupleTagList.of(additionalOutputTags);
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) ArrayList(java.util.ArrayList) TupleTag(org.apache.beam.sdk.values.TupleTag) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput)

Example 15 with ParDoPayload

use of org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload in project beam by apache.

the class ParDoTranslation method getSideInputs.

public static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> application) throws IOException {
    PTransform<?, ?> transform = application.getTransform();
    if (transform instanceof ParDo.MultiOutput) {
        return ((ParDo.MultiOutput<?, ?>) transform).getSideInputs().values().stream().collect(Collectors.toList());
    }
    SdkComponents sdkComponents = SdkComponents.create(application.getPipeline().getOptions());
    RunnerApi.PTransform parDoProto = PTransformTranslation.toProto(application, sdkComponents);
    ParDoPayload payload = ParDoPayload.parseFrom(parDoProto.getSpec().getPayload());
    List<PCollectionView<?>> views = new ArrayList<>();
    RehydratedComponents components = RehydratedComponents.forComponents(sdkComponents.toComponents());
    for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
        String sideInputTag = sideInputEntry.getKey();
        RunnerApi.SideInput sideInput = sideInputEntry.getValue();
        PCollection<?> originalPCollection = checkNotNull((PCollection<?>) application.getInputs().get(new TupleTag<>(sideInputTag)), "no input with tag %s", sideInputTag);
        views.add(PCollectionViewTranslation.viewFromProto(sideInput, sideInputTag, originalPCollection, parDoProto, components));
    }
    return views;
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput) ArrayList(java.util.ArrayList) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollectionView(org.apache.beam.sdk.values.PCollectionView) ParDo(org.apache.beam.sdk.transforms.ParDo) Map(java.util.Map) HashMap(java.util.HashMap) MultiOutput(org.apache.beam.sdk.transforms.ParDo.MultiOutput) SideInput(org.apache.beam.model.pipeline.v1.RunnerApi.SideInput)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)13 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)11 ParDoPayload (org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload)10 Map (java.util.Map)8 ArrayList (java.util.ArrayList)6 Pipeline (org.apache.beam.sdk.Pipeline)6 PCollectionView (org.apache.beam.sdk.values.PCollectionView)6 Test (org.junit.Test)6 IOException (java.io.IOException)5 HashMap (java.util.HashMap)5 InvalidProtocolBufferException (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException)5 DoFn (org.apache.beam.sdk.transforms.DoFn)4 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)4 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)3 ExpansionApi (org.apache.beam.model.expansion.v1.ExpansionApi)3 ProcessBundleDescriptor (org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor)3 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)3 FunctionSpec (org.apache.beam.model.pipeline.v1.RunnerApi.FunctionSpec)3 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)3 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)3