Search in sources :

Example 46 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class FlinkSavepointTest method createStreamingJob.

private static PCollection createStreamingJob(Pipeline pipeline, boolean restored, boolean isPortablePipeline) {
    final PCollection<KV<String, Long>> key;
    if (isPortablePipeline) {
        key = pipeline.apply("ImpulseStage", Impulse.create()).apply("KvMapperStage", MapElements.via(new InferableFunction<byte[], KV<String, Void>>() {

            @Override
            public KV<String, Void> apply(byte[] input) throws Exception {
                // https://jira.apache.org/jira/browse/BEAM-7144
                return KV.of("key", null);
            }
        })).apply("TimerStage", ParDo.of(new DoFn<KV<String, Void>, KV<String, Long>>() {

            @StateId("nextInteger")
            private final StateSpec<ValueState<Long>> valueStateSpec = StateSpecs.value();

            @TimerId("timer")
            private final TimerSpec timer = TimerSpecs.timer(TimeDomain.EVENT_TIME);

            @ProcessElement
            public void processElement(ProcessContext context, @TimerId("timer") Timer timer) {
                timer.set(new Instant(0));
            }

            @OnTimer("timer")
            public void onTimer(OnTimerContext context, @StateId("nextInteger") ValueState<Long> nextInteger, @TimerId("timer") Timer timer) {
                Long current = nextInteger.read();
                current = current != null ? current : 0L;
                context.output(KV.of("key", current));
                LOG.debug("triggering timer {}", current);
                nextInteger.write(current + 1);
                // Trigger timer again and continue to hold back the watermark
                timer.withOutputTimestamp(new Instant(0)).set(context.fireTimestamp());
            }
        }));
    } else {
        key = pipeline.apply("IdGeneratorStage", GenerateSequence.from(0)).apply("KvMapperStage", ParDo.of(new DoFn<Long, KV<String, Long>>() {

            @ProcessElement
            public void processElement(ProcessContext context) {
                context.output(KV.of("key", context.element()));
            }
        }));
    }
    if (restored) {
        return key.apply("VerificationStage", ParDo.of(new DoFn<KV<String, Long>, String>() {

            @StateId("valueState")
            private final StateSpec<ValueState<Integer>> valueStateSpec = StateSpecs.value();

            @StateId("bagState")
            private final StateSpec<BagState<Integer>> bagStateSpec = StateSpecs.bag();

            @ProcessElement
            public void processElement(ProcessContext context, @StateId("valueState") ValueState<Integer> intValueState, @StateId("bagState") BagState<Integer> intBagState) {
                assertThat(intValueState.read(), Matchers.is(42));
                assertThat(intBagState.read(), IsIterableContaining.hasItems(40, 1, 1));
                oneShotLatch.countDown();
            }
        }));
    } else {
        return key.apply("VerificationStage", ParDo.of(new DoFn<KV<String, Long>, String>() {

            @StateId("valueState")
            private final StateSpec<ValueState<Integer>> valueStateSpec = StateSpecs.value();

            @StateId("bagState")
            private final StateSpec<BagState<Integer>> bagStateSpec = StateSpecs.bag();

            @ProcessElement
            public void processElement(ProcessContext context, @StateId("valueState") ValueState<Integer> intValueState, @StateId("bagState") BagState<Integer> intBagState) {
                long value = Objects.requireNonNull(context.element().getValue());
                LOG.debug("value: {} timestamp: {}", value, context.timestamp().getMillis());
                if (value == 0L) {
                    intValueState.write(42);
                    intBagState.add(40);
                    intBagState.add(1);
                    intBagState.add(1);
                } else if (value >= 1) {
                    oneShotLatch.countDown();
                }
            }
        }));
    }
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) BagState(org.apache.beam.sdk.state.BagState) TimerSpec(org.apache.beam.sdk.state.TimerSpec) InferableFunction(org.apache.beam.sdk.transforms.InferableFunction) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) ValueState(org.apache.beam.sdk.state.ValueState) DoFn(org.apache.beam.sdk.transforms.DoFn) Timer(org.apache.beam.sdk.state.Timer)

Example 47 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class DataflowRunnerTest method verifySetStateUnsupported.

private void verifySetStateUnsupported(PipelineOptions options) throws Exception {
    Pipeline p = Pipeline.create(options);
    p.apply(Create.of(KV.of(13, 42))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Void>() {

        @StateId("fizzle")
        private final StateSpec<SetState<Void>> voidState = StateSpecs.set();

        @ProcessElement
        public void process() {
        }
    }));
    thrown.expectMessage("SetState");
    thrown.expect(UnsupportedOperationException.class);
    p.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) DoFn(org.apache.beam.sdk.transforms.DoFn) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline)

Example 48 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class DataflowRunnerTest method verifyMapStateUnsupported.

private void verifyMapStateUnsupported(PipelineOptions options) throws Exception {
    Pipeline p = Pipeline.create(options);
    p.apply(Create.of(KV.of(13, 42))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Void>() {

        @StateId("fizzle")
        private final StateSpec<MapState<Void, Void>> voidState = StateSpecs.map();

        @ProcessElement
        public void process() {
        }
    }));
    thrown.expectMessage("MapState");
    thrown.expect(UnsupportedOperationException.class);
    p.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) DoFn(org.apache.beam.sdk.transforms.DoFn) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline)

Example 49 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class DataflowRunnerTest method verifyMergingStatefulParDoRejected.

private void verifyMergingStatefulParDoRejected(PipelineOptions options) throws Exception {
    Pipeline p = Pipeline.create(options);
    p.apply(Create.of(KV.of(13, 42))).apply(Window.into(Sessions.withGapDuration(Duration.millis(1)))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Void>() {

        @StateId("fizzle")
        private final StateSpec<ValueState<Void>> voidState = StateSpecs.value();

        @ProcessElement
        public void process() {
        }
    }));
    thrown.expectMessage("merging");
    thrown.expect(UnsupportedOperationException.class);
    p.run();
}
Also used : StateSpec(org.apache.beam.sdk.state.StateSpec) DoFn(org.apache.beam.sdk.transforms.DoFn) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline)

Example 50 with DoFn

use of org.apache.beam.sdk.transforms.DoFn in project beam by apache.

the class BatchStatefulParDoOverridesTest method testFnApiMultiOutputOverrideNonCrashing.

@Test
@Ignore("TODO: BEAM-2902 Add support for user state in a ParDo.Multi once PTransformMatcher " + "exposes a way to know when the replacement is not required by checking that the " + "preceding ParDos to a GBK are key preserving.")
public void testFnApiMultiOutputOverrideNonCrashing() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setRunner(DataflowRunner.class);
    Pipeline pipeline = Pipeline.create(options);
    TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {
    };
    TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {
    };
    DummyStatefulDoFn fn = new DummyStatefulDoFn();
    pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) DoFn(org.apache.beam.sdk.transforms.DoFn) TupleTag(org.apache.beam.sdk.values.TupleTag) Pipeline(org.apache.beam.sdk.Pipeline) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

DoFn (org.apache.beam.sdk.transforms.DoFn)154 Test (org.junit.Test)98 Pipeline (org.apache.beam.sdk.Pipeline)60 KV (org.apache.beam.sdk.values.KV)45 TupleTag (org.apache.beam.sdk.values.TupleTag)28 StateSpec (org.apache.beam.sdk.state.StateSpec)26 Instant (org.joda.time.Instant)26 ArrayList (java.util.ArrayList)23 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)23 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 PCollection (org.apache.beam.sdk.values.PCollection)21 TimerSpec (org.apache.beam.sdk.state.TimerSpec)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)18 PCollectionView (org.apache.beam.sdk.values.PCollectionView)18 HashMap (java.util.HashMap)17 Coder (org.apache.beam.sdk.coders.Coder)17 List (java.util.List)16 Map (java.util.Map)14 ValueState (org.apache.beam.sdk.state.ValueState)14 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)13