Search in sources :

Example 26 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class DataflowPipelineTranslatorTest method testStreamingSplittableParDoTranslation.

/**
 * Smoke test to fail fast if translation of a splittable ParDo in streaming breaks.
 */
@Test
public void testStreamingSplittableParDoTranslation() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(true);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<String> windowedInput = pipeline.apply(Create.of("a")).apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
    windowedInput.apply(ParDo.of(new TestSplittableFn()));
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    // The job should contain a SplittableParDo.ProcessKeyedElements step, translated as
    // "SplittableProcessKeyed".
    List<Step> steps = job.getSteps();
    Step processKeyedStep = null;
    for (Step step : steps) {
        if ("SplittableProcessKeyed".equals(step.getKind())) {
            assertNull(processKeyedStep);
            processKeyedStep = step;
        }
    }
    assertNotNull(processKeyedStep);
    @SuppressWarnings({ "unchecked", "rawtypes" }) DoFnInfo<String, Integer> fnInfo = (DoFnInfo<String, Integer>) SerializableUtils.deserializeFromByteArray(jsonStringToByteArray(getString(processKeyedStep.getProperties(), PropertyNames.SERIALIZED_FN)), "DoFnInfo");
    assertThat(fnInfo.getDoFn(), instanceOf(TestSplittableFn.class));
    assertThat(fnInfo.getWindowingStrategy().getWindowFn(), Matchers.<WindowFn>equalTo(FixedWindows.of(Duration.standardMinutes(1))));
    assertThat(fnInfo.getInputCoder(), instanceOf(StringUtf8Coder.class));
    Coder<?> restrictionCoder = CloudObjects.coderFromCloudObject((CloudObject) Structs.getObject(processKeyedStep.getProperties(), PropertyNames.RESTRICTION_CODER));
    assertEquals(KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of()), restrictionCoder);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) DoFnInfo(org.apache.beam.sdk.util.DoFnInfo) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Step(com.google.api.services.dataflow.model.Step) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Job(com.google.api.services.dataflow.model.Job) Test(org.junit.Test)

Example 27 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class DataflowPipelineTranslatorTest method runStreamingGroupIntoBatchesAndGetJobSpec.

private JobSpecification runStreamingGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setExperiments(experiments);
    options.setStreaming(true);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
    if (withShardedKey) {
        input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
    } else {
        input.apply(GroupIntoBatches.ofSize(3));
    }
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) KV(org.apache.beam.sdk.values.KV) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline)

Example 28 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class DataflowPipelineTranslatorTest method runBatchGroupIntoBatchesAndGetJobSpec.

private JobSpecification runBatchGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setExperiments(experiments);
    options.setStreaming(false);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
    if (withShardedKey) {
        input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
    } else {
        input.apply(GroupIntoBatches.ofSize(3));
    }
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) KV(org.apache.beam.sdk.values.KV) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline)

Example 29 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class DataflowPipelineTranslatorTest method testResourceHintsTranslationsResolvesHintsOnOptionsAndComposites.

@Test
public void testResourceHintsTranslationsResolvesHintsOnOptionsAndComposites() {
    ResourceHintsOptions options = PipelineOptionsFactory.as(ResourceHintsOptions.class);
    options.setResourceHints(Arrays.asList("accelerator=set_via_options", "minRam=1B"));
    Pipeline pipeline = Pipeline.create(options);
    PCollection<byte[]> root = pipeline.apply(Impulse.create());
    root.apply(new Outer().setResourceHints(ResourceHints.create().withAccelerator("set_on_outer_transform").withMinRam(20)));
    root.apply("Leaf", ParDo.of(new IdentityDoFn<byte[]>()));
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, false);
    assertThat(pipelineProto.getComponents().getEnvironmentsMap().get(getLeafTransform(pipelineProto, "Leaf").getEnvironmentId()).getResourceHintsMap(), org.hamcrest.Matchers.allOf(org.hamcrest.Matchers.hasEntry("beam:resources:min_ram_bytes:v1", ByteString.copyFromUtf8("1")), org.hamcrest.Matchers.hasEntry("beam:resources:accelerator:v1", ByteString.copyFromUtf8("set_via_options"))));
    assertThat(pipelineProto.getComponents().getEnvironmentsMap().get(getLeafTransform(pipelineProto, "Innermost").getEnvironmentId()).getResourceHintsMap(), org.hamcrest.Matchers.allOf(org.hamcrest.Matchers.hasEntry("beam:resources:min_ram_bytes:v1", ByteString.copyFromUtf8("20")), org.hamcrest.Matchers.hasEntry("beam:resources:accelerator:v1", ByteString.copyFromUtf8("set_in_inner_transform"))));
}
Also used : ResourceHintsOptions(org.apache.beam.sdk.transforms.resourcehints.ResourceHintsOptions) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 30 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class GreedyStageFuser method anyInputsSideInputs.

private static boolean anyInputsSideInputs(PTransformNode consumer, QueryablePipeline pipeline) {
    for (String inputPCollectionId : consumer.getTransform().getInputsMap().values()) {
        RunnerApi.PCollection pCollection = pipeline.getComponents().getPcollectionsMap().get(inputPCollectionId);
        PCollectionNode pCollectionNode = PipelineNode.pCollection(inputPCollectionId, pCollection);
        if (!pipeline.getSingletonConsumers(pCollectionNode).isEmpty()) {
            return true;
        }
    }
    return false;
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PCollectionNode(org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)45 Test (org.junit.Test)45 Pipeline (org.apache.beam.sdk.Pipeline)25 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)24 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 Map (java.util.Map)21 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)21 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)17 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)14 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)13 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 PCollection (org.apache.beam.sdk.values.PCollection)12 Coder (org.apache.beam.sdk.coders.Coder)11 KV (org.apache.beam.sdk.values.KV)11 Collection (java.util.Collection)10 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9