Search in sources :

Example 1 with DoFnInfo

use of org.apache.beam.sdk.util.DoFnInfo in project beam by apache.

the class DataflowPipelineTranslatorTest method testStreamingSplittableParDoTranslation.

/**
 * Smoke test to fail fast if translation of a splittable ParDo in streaming breaks.
 */
@Test
public void testStreamingSplittableParDoTranslation() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(true);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    PCollection<String> windowedInput = pipeline.apply(Create.of("a")).apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
    windowedInput.apply(ParDo.of(new TestSplittableFn()));
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    // The job should contain a SplittableParDo.ProcessKeyedElements step, translated as
    // "SplittableProcessKeyed".
    List<Step> steps = job.getSteps();
    Step processKeyedStep = null;
    for (Step step : steps) {
        if ("SplittableProcessKeyed".equals(step.getKind())) {
            assertNull(processKeyedStep);
            processKeyedStep = step;
        }
    }
    assertNotNull(processKeyedStep);
    @SuppressWarnings({ "unchecked", "rawtypes" }) DoFnInfo<String, Integer> fnInfo = (DoFnInfo<String, Integer>) SerializableUtils.deserializeFromByteArray(jsonStringToByteArray(getString(processKeyedStep.getProperties(), PropertyNames.SERIALIZED_FN)), "DoFnInfo");
    assertThat(fnInfo.getDoFn(), instanceOf(TestSplittableFn.class));
    assertThat(fnInfo.getWindowingStrategy().getWindowFn(), Matchers.<WindowFn>equalTo(FixedWindows.of(Duration.standardMinutes(1))));
    assertThat(fnInfo.getInputCoder(), instanceOf(StringUtf8Coder.class));
    Coder<?> restrictionCoder = CloudObjects.coderFromCloudObject((CloudObject) Structs.getObject(processKeyedStep.getProperties(), PropertyNames.RESTRICTION_CODER));
    assertEquals(KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of()), restrictionCoder);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) DoFnInfo(org.apache.beam.sdk.util.DoFnInfo) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Step(com.google.api.services.dataflow.model.Step) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) Job(com.google.api.services.dataflow.model.Job) Test(org.junit.Test)

Example 2 with DoFnInfo

use of org.apache.beam.sdk.util.DoFnInfo in project beam by apache.

the class DefaultParDoFnFactoryTest method testCreateSimpleParDoFn.

/**
 * Tests that a {@link SimpleParDoFn} is correctly dispatched to {@code UserParDoFnFactory} and
 * instantiated correctly.
 */
@Test
public void testCreateSimpleParDoFn() throws Exception {
    // A serialized DoFn
    String stringFieldValue = "some state";
    long longFieldValue = 42L;
    TestDoFn fn = new TestDoFn(stringFieldValue, longFieldValue);
    String serializedFn = StringUtils.byteArrayToJsonString(SerializableUtils.serializeToByteArray(DoFnInfo.forFn(fn, WindowingStrategy.globalDefault(), null, /* side input views */
    null, /* input coder */
    new TupleTag<>("output"), /* main output */
    DoFnSchemaInformation.create(), Collections.emptyMap())));
    CloudObject cloudUserFn = CloudObject.forClassName("DoFn");
    addString(cloudUserFn, "serialized_fn", serializedFn);
    // Create the ParDoFn from the serialized DoFn
    ParDoFn parDoFn = DEFAULT_FACTORY.create(DEFAULT_OPTIONS, cloudUserFn, null, MAIN_OUTPUT, ImmutableMap.<TupleTag<?>, Integer>of(MAIN_OUTPUT, 0), DEFAULT_EXECUTION_CONTEXT, TestOperationContext.create(counterSet));
    // Test that the factory created the correct class
    assertThat(parDoFn, instanceOf(SimpleParDoFn.class));
    // TODO: move the asserts below into new tests in UserParDoFnFactoryTest, and this test should
    // simply assert that DefaultParDoFnFactory.create() matches UserParDoFnFactory.create()
    // Test that the DoFnInfo reflects the one passed in
    SimpleParDoFn simpleParDoFn = (SimpleParDoFn) parDoFn;
    parDoFn.startBundle(new OutputReceiver());
    // DoFnInfo may not yet be initialized until an element is processed
    parDoFn.processElement(WindowedValue.valueInGlobalWindow("foo"));
    @SuppressWarnings("rawtypes") DoFnInfo doFnInfo = simpleParDoFn.getDoFnInfo();
    DoFn innerDoFn = (TestDoFn) doFnInfo.getDoFn();
    assertThat(innerDoFn, instanceOf(TestDoFn.class));
    assertThat(doFnInfo.getWindowingStrategy().getWindowFn(), instanceOf(GlobalWindows.class));
    assertThat(doFnInfo.getWindowingStrategy().getTrigger(), instanceOf(DefaultTrigger.class));
    // Test that the deserialized user DoFn is as expected
    TestDoFn actualTestDoFn = (TestDoFn) innerDoFn;
    assertEquals(stringFieldValue, actualTestDoFn.stringField);
    assertEquals(longFieldValue, actualTestDoFn.longField);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) DoFnInfo(org.apache.beam.sdk.util.DoFnInfo) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) DoFn(org.apache.beam.sdk.transforms.DoFn) ParDoFn(org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) DefaultTrigger(org.apache.beam.sdk.transforms.windowing.DefaultTrigger) Test(org.junit.Test)

Aggregations

DoFnInfo (org.apache.beam.sdk.util.DoFnInfo)2 Test (org.junit.Test)2 Job (com.google.api.services.dataflow.model.Job)1 Step (com.google.api.services.dataflow.model.Step)1 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)1 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)1 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)1 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)1 Structs.addString (org.apache.beam.runners.dataflow.util.Structs.addString)1 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)1 OutputReceiver (org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver)1 ParDoFn (org.apache.beam.runners.dataflow.worker.util.common.worker.ParDoFn)1 Pipeline (org.apache.beam.sdk.Pipeline)1 StringUtf8Coder (org.apache.beam.sdk.coders.StringUtf8Coder)1 DoFn (org.apache.beam.sdk.transforms.DoFn)1 DefaultTrigger (org.apache.beam.sdk.transforms.windowing.DefaultTrigger)1 GlobalWindows (org.apache.beam.sdk.transforms.windowing.GlobalWindows)1 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)1