Search in sources :

Example 61 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class FnHarness method main.

@VisibleForTesting
public static void main(Function<String, String> environmentVarGetter) throws Exception {
    JvmInitializers.runOnStartup();
    System.out.format("SDK Fn Harness started%n");
    System.out.format("Harness ID %s%n", environmentVarGetter.apply(HARNESS_ID));
    System.out.format("Logging location %s%n", environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
    System.out.format("Control location %s%n", environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
    System.out.format("Status location %s%n", environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
    System.out.format("Pipeline options %s%n", environmentVarGetter.apply(PIPELINE_OPTIONS));
    String id = environmentVarGetter.apply(HARNESS_ID);
    PipelineOptions options = PipelineOptionsTranslation.fromJson(environmentVarGetter.apply(PIPELINE_OPTIONS));
    Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
    Endpoints.ApiServiceDescriptor controlApiServiceDescriptor = getApiServiceDescriptor(environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
    Endpoints.ApiServiceDescriptor statusApiServiceDescriptor = environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR) == null ? null : getApiServiceDescriptor(environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
    String runnerCapabilitesOrNull = environmentVarGetter.apply(RUNNER_CAPABILITIES);
    Set<String> runnerCapabilites = runnerCapabilitesOrNull == null ? Collections.emptySet() : ImmutableSet.copyOf(runnerCapabilitesOrNull.split("\\s+"));
    main(id, options, runnerCapabilites, loggingApiServiceDescriptor, controlApiServiceDescriptor, statusApiServiceDescriptor);
}
Also used : Endpoints(org.apache.beam.model.pipeline.v1.Endpoints) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 62 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopicOverridden.

@Test
public void testTranslateSinkWithTopicOverridden() throws Exception {
    ValueProvider<TopicPath> runtimeProvider = pipeline.newProvider(TOPIC);
    PubsubUnboundedSink pubsubUnboundedSinkSink = new PubsubUnboundedSink(null, runtimeProvider, TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
    PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSinkSink);
    PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
    PDone output = input.apply(pubsubSink);
    AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
    assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
    PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
    assertEquals(((NestedValueProvider) runtimeProvider).propertyName(), payload.getTopicRuntimeOverridden());
    assertTrue(payload.getTopic().isEmpty());
    assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
    assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) TopicPath(org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath) PDone(org.apache.beam.sdk.values.PDone) PubsubSink(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSink.PubsubSink) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) PubSubWritePayload(org.apache.beam.model.pipeline.v1.RunnerApi.PubSubWritePayload) Test(org.junit.Test)

Example 63 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class PubSubWritePayloadTranslationTest method testTranslateSinkWithTopic.

@Test
public void testTranslateSinkWithTopic() throws Exception {
    PubsubUnboundedSink pubsubUnboundedSink = new PubsubUnboundedSink(null, StaticValueProvider.of(TOPIC), TIMESTAMP_ATTRIBUTE, ID_ATTRIBUTE, 0, 0, 0, Duration.ZERO, null);
    PubsubUnboundedSink.PubsubSink pubsubSink = new PubsubSink(pubsubUnboundedSink);
    PCollection<byte[]> input = pipeline.apply(Create.of(new byte[0]));
    PDone output = input.apply(pubsubSink);
    AppliedPTransform<?, ?, PubsubSink> appliedPTransform = AppliedPTransform.of("sink", PValues.expandInput(input), PValues.expandOutput(output), pubsubSink, ResourceHints.create(), pipeline);
    SdkComponents components = SdkComponents.create();
    components.registerEnvironment(Environments.createDockerEnvironment("java"));
    RunnerApi.FunctionSpec spec = sinkTranslator.translate(appliedPTransform, components);
    assertEquals(PTransformTranslation.PUBSUB_WRITE, spec.getUrn());
    PubSubWritePayload payload = PubSubWritePayload.parseFrom(spec.getPayload());
    assertEquals(TOPIC.getFullPath(), payload.getTopic());
    assertTrue(payload.getTopicRuntimeOverridden().isEmpty());
    assertEquals(TIMESTAMP_ATTRIBUTE, payload.getTimestampAttribute());
    assertEquals(ID_ATTRIBUTE, payload.getIdAttribute());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PDone(org.apache.beam.sdk.values.PDone) PubsubSink(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSink.PubsubSink) PubsubSink(org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSink.PubsubSink) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) PubSubWritePayload(org.apache.beam.model.pipeline.v1.RunnerApi.PubSubWritePayload) Test(org.junit.Test)

Example 64 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class PubsubIOExternalTest method testConstructPubsubWrite.

@Test
public void testConstructPubsubWrite() throws Exception {
    String topic = "projects/project-1234/topics/topic_name";
    String idAttribute = "id_foo";
    ExternalTransforms.ExternalConfigurationPayload payload = encodeRow(Row.withSchema(Schema.of(Field.of("topic", FieldType.STRING), Field.of("id_label", FieldType.STRING))).withFieldValue("topic", topic).withFieldValue("id_label", idAttribute).build());
    // Requirements are not passed as part of the expansion service so the validation
    // fails because of how we construct the pipeline to expand the transform since it now
    // has a transform with a requirement.
    Pipeline p = Pipeline.create();
    p.apply("unbounded", Impulse.create()).setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    String inputPCollection = Iterables.getOnlyElement(Iterables.getLast(pipelineProto.getComponents().getTransformsMap().values()).getOutputsMap().values());
    ExpansionApi.ExpansionRequest request = ExpansionApi.ExpansionRequest.newBuilder().setComponents(pipelineProto.getComponents()).setTransform(RunnerApi.PTransform.newBuilder().setUniqueName("test").putInputs("input", inputPCollection).setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn(ExternalWrite.URN).setPayload(payload.toByteString()))).setNamespace("test_namespace").build();
    ExpansionService expansionService = new ExpansionService();
    TestStreamObserver<ExpansionApi.ExpansionResponse> observer = new TestStreamObserver<>();
    expansionService.expand(request, observer);
    ExpansionApi.ExpansionResponse result = observer.result;
    RunnerApi.PTransform transform = result.getTransform();
    assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*MapElements.*")));
    assertThat(transform.getSubtransformsList(), Matchers.hasItem(MatchesPattern.matchesPattern(".*PubsubUnboundedSink.*")));
    assertThat(transform.getInputsCount(), Matchers.is(1));
    assertThat(transform.getOutputsCount(), Matchers.is(0));
    // test_namespacetest/PubsubUnboundedSink
    RunnerApi.PTransform writeComposite = result.getComponents().getTransformsOrThrow(transform.getSubtransforms(1));
    // test_namespacetest/PubsubUnboundedSink/PubsubSink
    RunnerApi.PTransform writeComposite2 = result.getComponents().getTransformsOrThrow(writeComposite.getSubtransforms(1));
    // test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer
    RunnerApi.PTransform writeComposite3 = result.getComponents().getTransformsOrThrow(writeComposite2.getSubtransforms(3));
    // test_namespacetest/PubsubUnboundedSink/PubsubSink/PubsubUnboundedSink.Writer/ParMultiDo(Writer)
    RunnerApi.PTransform writeParDo = result.getComponents().getTransformsOrThrow(writeComposite3.getSubtransforms(0));
    RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(writeParDo.getSpec().getPayload());
    DoFn<?, ?> pubsubWriter = ParDoTranslation.getDoFn(parDoPayload);
    String idAttributeActual = (String) Whitebox.getInternalState(pubsubWriter, "idAttribute");
    ValueProvider<PubsubClient.TopicPath> topicActual = (ValueProvider<PubsubClient.TopicPath>) Whitebox.getInternalState(pubsubWriter, "topic");
    assertThat(topicActual == null ? null : String.valueOf(topicActual), Matchers.is(topic));
    assertThat(idAttributeActual, Matchers.is(idAttribute));
}
Also used : ExpansionService(org.apache.beam.sdk.expansion.service.ExpansionService) ExternalTransforms(org.apache.beam.model.pipeline.v1.ExternalTransforms) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ExpansionApi(org.apache.beam.model.expansion.v1.ExpansionApi) ValueProvider(org.apache.beam.sdk.options.ValueProvider) Test(org.junit.Test)

Example 65 with Pipeline

use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.

the class GreedyPCollectionFusers method canFuseParDo.

/**
 * A ParDo can be fused into a stage if it executes in the same Environment as that stage, and no
 * transform that are upstream of any of its side input are present in that stage.
 *
 * <p>A ParDo that consumes a side input cannot process an element until all of the side inputs
 * contain data for the side input window that contains the element.
 */
private static boolean canFuseParDo(PTransformNode parDo, Environment environment, PCollectionNode candidate, Collection<PCollectionNode> stagePCollections, QueryablePipeline pipeline) {
    Optional<Environment> env = pipeline.getEnvironment(parDo);
    checkArgument(env.isPresent(), "A %s must have an %s associated with it", ParDoPayload.class.getSimpleName(), Environment.class.getSimpleName());
    if (!env.get().equals(environment)) {
        // is never possible.
        return false;
    }
    try {
        ParDoPayload payload = ParDoPayload.parseFrom(parDo.getTransform().getSpec().getPayload());
        if (Maps.filterKeys(parDo.getTransform().getInputsMap(), s -> payload.getTimerFamilySpecsMap().containsKey(s)).values().contains(candidate.getId())) {
            // Allow fusion across timer PCollections because they are a self loop.
            return true;
        } else if (payload.getStateSpecsCount() > 0 || payload.getTimerFamilySpecsCount() > 0) {
            // key-partitioned and preserves keys, these ParDos do not fuse into an existing stage.
            return false;
        } else if (!pipeline.getSideInputs(parDo).isEmpty()) {
            // executable stage alongside any transforms which are upstream of any of its side inputs.
            return false;
        }
    } catch (InvalidProtocolBufferException e) {
        throw new IllegalArgumentException(e);
    }
    return true;
}
Also used : ParDoPayload(org.apache.beam.model.pipeline.v1.RunnerApi.ParDoPayload) InvalidProtocolBufferException(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.InvalidProtocolBufferException) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)117 Test (org.junit.Test)87 Pipeline (org.apache.beam.sdk.Pipeline)82 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)44 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)43 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)38 Map (java.util.Map)32 KV (org.apache.beam.sdk.values.KV)26 Job (com.google.api.services.dataflow.model.Job)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)24 KvCoder (org.apache.beam.sdk.coders.KvCoder)24 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)23 Coder (org.apache.beam.sdk.coders.Coder)23 ArrayList (java.util.ArrayList)22 WindowedValue (org.apache.beam.sdk.util.WindowedValue)22 HashMap (java.util.HashMap)20 List (java.util.List)20 ExecutableStage (org.apache.beam.runners.core.construction.graph.ExecutableStage)19 IOException (java.io.IOException)18 PCollection (org.apache.beam.sdk.values.PCollection)18