Search in sources :

Example 16 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testStreamingSplittableParDoTranslation.

/**
 * Smoke test to fail fast if translation of a splittable ParDo in streaming breaks.
 */
@Test
public void testStreamingSplittableParDoTranslation() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(true);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<String> windowedInput = pipeline.apply(Create.of("a")).apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
    windowedInput.apply(ParDo.of(new TestSplittableFn()));
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    // The job should contain a SplittableParDo.ProcessKeyedElements step, translated as
    // "SplittableProcessKeyed".
    List<Step> steps = job.getSteps();
    Step processKeyedStep = null;
    for (Step step : steps) {
        if ("SplittableProcessKeyed".equals(step.getKind())) {
            assertNull(processKeyedStep);
            processKeyedStep = step;
        }
    }
    assertNotNull(processKeyedStep);
    @SuppressWarnings({ "unchecked", "rawtypes" }) DoFnInfo<String, Integer> fnInfo = (DoFnInfo<String, Integer>) SerializableUtils.deserializeFromByteArray(jsonStringToByteArray(getString(processKeyedStep.getProperties(), PropertyNames.SERIALIZED_FN)), "DoFnInfo");
    assertThat(fnInfo.getDoFn(), instanceOf(TestSplittableFn.class));
    assertThat(fnInfo.getWindowingStrategy().getWindowFn(), Matchers.<WindowFn>equalTo(FixedWindows.of(Duration.standardMinutes(1))));
    assertThat(fnInfo.getInputCoder(), instanceOf(StringUtf8Coder.class));
    Coder<?> restrictionCoder = CloudObjects.coderFromCloudObject((CloudObject) Structs.getObject(processKeyedStep.getProperties(), PropertyNames.RESTRICTION_CODER));
    assertEquals(KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of()), restrictionCoder);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) DoFnInfo(org.apache.beam.sdk.util.DoFnInfo) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Step(com.google.api.services.dataflow.model.Step) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Job(com.google.api.services.dataflow.model.Job) Test(org.junit.Test)

Example 17 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testToList.

/**
 * Testing just the translation of the pipeline from ViewTest#testToList.
 */
@Test
public void testToList() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline pipeline = Pipeline.create(options);
    final PCollectionView<List<Integer>> view = pipeline.apply("CreateSideInput", Create.of(11, 13, 17, 23)).apply(View.asList());
    pipeline.apply("CreateMainInput", Create.of(29, 31)).apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            checkArgument(c.sideInput(view).size() == 4);
            checkArgument(c.sideInput(view).get(0).equals(c.sideInput(view).get(0)));
            for (Integer i : c.sideInput(view)) {
                c.output(i);
            }
        }
    }).withSideInputs(view));
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    List<Step> steps = job.getSteps();
    // Change detector assertion just to make sure the test was not a noop.
    // No need to actually check the pipeline as the ValidatesRunner tests
    // ensure translation is correct. This is just a quick check to see that translation
    // does not crash.
    assertEquals(5, steps.size());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Step(com.google.api.services.dataflow.model.Step) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) TupleTagList(org.apache.beam.sdk.values.TupleTagList) Job(com.google.api.services.dataflow.model.Job) Test(org.junit.Test)

Example 18 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testSetWorkerHarnessContainerImageInPipelineProto.

/**
 * Tests that when (deprecated) {@link
 * DataflowPipelineOptions#setWorkerHarnessContainerImage(String)} pipeline option is set, {@link
 * DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of the default
 * {@link Environment} used when generating the model pipeline proto.
 */
@Test
public void testSetWorkerHarnessContainerImageInPipelineProto() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    String containerImage = "gcr.io/image:foo";
    options.as(DataflowPipelineOptions.class).setWorkerHarnessContainerImage(containerImage);
    Pipeline p = Pipeline.create(options);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
    JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
    RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
    assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
    Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
    DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
    assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Environment(org.apache.beam.model.pipeline.v1.RunnerApi.Environment) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) JobSpecification(org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) DockerPayload(org.apache.beam.model.pipeline.v1.RunnerApi.DockerPayload) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 19 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testBatchStatefulParDoTranslation.

/**
 * Smoke test to fail fast if translation of a stateful ParDo in batch breaks.
 */
@Test
public void testBatchStatefulParDoTranslation() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(false);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {
    };
    pipeline.apply(Create.of(KV.of(1, 1))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Integer>() {

        @StateId("unused")
        final StateSpec<ValueState<Integer>> stateSpec = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void process(ProcessContext c) {
        // noop
        }
    }).withOutputTags(mainOutputTag, TupleTagList.empty()));
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    // The job should look like:
    // 0. ParallelRead (Create)
    // 1. ParDo(ReifyWVs)
    // 2. GroupByKeyAndSortValuesONly
    // 3. A ParDo over grouped and sorted KVs that is executed via ungrouping service-side
    List<Step> steps = job.getSteps();
    assertEquals(4, steps.size());
    Step createStep = steps.get(0);
    assertEquals("ParallelRead", createStep.getKind());
    Step reifyWindowedValueStep = steps.get(1);
    assertEquals("ParallelDo", reifyWindowedValueStep.getKind());
    Step gbkStep = steps.get(2);
    assertEquals("GroupByKey", gbkStep.getKind());
    Step statefulParDoStep = steps.get(3);
    assertEquals("ParallelDo", statefulParDoStep.getKind());
    assertThat((String) statefulParDoStep.getProperties().get(PropertyNames.USES_KEYED_STATE), not(equalTo("true")));
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TupleTag(org.apache.beam.sdk.values.TupleTag) KV(org.apache.beam.sdk.values.KV) Step(com.google.api.services.dataflow.model.Step) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) ValueState(org.apache.beam.sdk.state.ValueState) Job(com.google.api.services.dataflow.model.Job) Test(org.junit.Test)

Example 20 with SdkComponents

use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.

the class DataflowPipelineTranslatorTest method testStepResourceHints.

@Test
public void testStepResourceHints() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(Create.of(1, 2, 3)).apply("Has hints", MapElements.into(TypeDescriptors.integers()).via((Integer x) -> x + 1).setResourceHints(ResourceHints.create().withMinRam("10.0GiB").withAccelerator("type:nvidia-tesla-k80;count:1;install-nvidia-driver")));
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    Step stepWithHints = job.getSteps().get(1);
    ImmutableMap<String, Object> expectedHints = ImmutableMap.<String, Object>builder().put("beam:resources:min_ram_bytes:v1", "10737418240").put("beam:resources:accelerator:v1", "type:nvidia-tesla-k80;count:1;install-nvidia-driver").build();
    assertEquals(expectedHints, stepWithHints.getProperties().get("resource_hints"));
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) Job(com.google.api.services.dataflow.model.Job) Test(org.junit.Test)

Aggregations

SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)61 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)48 Test (org.junit.Test)46 Pipeline (org.apache.beam.sdk.Pipeline)37 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)36 Job (com.google.api.services.dataflow.model.Job)25 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)25 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)21 KV (org.apache.beam.sdk.values.KV)14 Map (java.util.Map)12 Step (com.google.api.services.dataflow.model.Step)11 ArrayList (java.util.ArrayList)11 List (java.util.List)9 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)9 HashMap (java.util.HashMap)8 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)8 WindowedValue (org.apache.beam.sdk.util.WindowedValue)7 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)7 InstructionOutput (com.google.api.services.dataflow.model.InstructionOutput)6 ParDoInstruction (com.google.api.services.dataflow.model.ParDoInstruction)6