Search in sources :

Example 51 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class MetricsContainerImplTest method testMonitoringInfosArePopulatedForABeamCounter.

@Test
public void testMonitoringInfosArePopulatedForABeamCounter() {
    MetricsContainerImpl testObject = new MetricsContainerImpl("step1");
    HashMap<String, String> labels = new HashMap<String, String>();
    labels.put(MonitoringInfoConstants.Labels.PCOLLECTION, "pcollection");
    MetricName name = MonitoringInfoMetricName.named(MonitoringInfoConstants.Urns.ELEMENT_COUNT, labels);
    CounterCell c1 = testObject.getCounter(name);
    c1.inc(2L);
    SimpleMonitoringInfoBuilder builder1 = new SimpleMonitoringInfoBuilder();
    builder1.setUrn(MonitoringInfoConstants.Urns.ELEMENT_COUNT);
    builder1.setLabel(MonitoringInfoConstants.Labels.PCOLLECTION, "pcollection");
    builder1.setInt64SumValue(2);
    ArrayList<MonitoringInfo> actualMonitoringInfos = new ArrayList<MonitoringInfo>();
    for (MonitoringInfo mi : testObject.getMonitoringInfos()) {
        actualMonitoringInfos.add(mi);
    }
    assertThat(actualMonitoringInfos, containsInAnyOrder(builder1.build()));
}
Also used : MetricName(org.apache.beam.sdk.metrics.MetricName) MonitoringInfo(org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 52 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class PortableStateExecutionTest method testExecution.

@Test(timeout = 120_000)
public void testExecution() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").create();
    options.setRunner(CrashingRunner.class);
    options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
    options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
    options.as(FlinkPipelineOptions.class).setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, String>> output = p.apply(Impulse.create()).apply(ParDo.of(new DoFn<byte[], KV<String, Integer>>() {

        @ProcessElement
        public void process(ProcessContext ctx) {
            // Values == -1 will clear the state
            ctx.output(KV.of("clearedState", 1));
            ctx.output(KV.of("clearedState", CLEAR_STATE));
            // values >= 1 will be added on top of each other
            ctx.output(KV.of("bla1", 42));
            ctx.output(KV.of("bla", 23));
            ctx.output(KV.of("bla2", 64));
            ctx.output(KV.of("bla", 1));
            ctx.output(KV.of("bla", 1));
            // values == -2 will write the current state to the output
            ctx.output(KV.of("bla", WRITE_STATE));
            ctx.output(KV.of("bla1", WRITE_STATE));
            ctx.output(KV.of("bla2", WRITE_STATE));
            ctx.output(KV.of("clearedState", WRITE_STATE));
        }
    })).apply("statefulDoFn", ParDo.of(new DoFn<KV<String, Integer>, KV<String, String>>() {

        @StateId("valueState")
        private final StateSpec<ValueState<Integer>> valueStateSpec = StateSpecs.value(VarIntCoder.of());

        @StateId("valueState2")
        private final StateSpec<ValueState<Integer>> valueStateSpec2 = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void process(ProcessContext ctx, @StateId("valueState") ValueState<Integer> valueState, @StateId("valueState2") ValueState<Integer> valueState2) {
            performStateUpdates(ctx, valueState);
            performStateUpdates(ctx, valueState2);
        }

        private void performStateUpdates(ProcessContext ctx, ValueState<Integer> valueState) {
            Integer value = ctx.element().getValue();
            if (value == null) {
                throw new IllegalStateException();
            }
            switch(value) {
                case CLEAR_STATE:
                    valueState.clear();
                    break;
                case WRITE_STATE:
                    Integer read = valueState.read();
                    ctx.output(KV.of(ctx.element().getKey(), read == null ? "null" : read.toString()));
                    break;
                default:
                    Integer currentState = valueState.read();
                    if (currentState == null) {
                        currentState = value;
                    } else {
                        currentState += value;
                    }
                    valueState.write(currentState);
            }
        }
    }));
    PAssert.that(output).containsInAnyOrder(KV.of("bla", "25"), KV.of("bla1", "42"), KV.of("bla2", "64"), KV.of("clearedState", "null"), KV.of("bla", "25"), KV.of("bla1", "42"), KV.of("bla2", "64"), KV.of("clearedState", "null"));
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        Thread.sleep(1000);
    }
}
Also used : JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) StateSpec(org.apache.beam.sdk.state.StateSpec) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 53 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class PortableExecutionTest method testExecution.

@Test(timeout = 120_000)
public void testExecution() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").create();
    options.setRunner(CrashingRunner.class);
    options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
    options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
    options.as(FlinkPipelineOptions.class).setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, Iterable<Long>>> result = p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            ctxt.output("zero");
            ctxt.output("one");
            ctxt.output("two");
        }
    })).apply("len", ParDo.of(new DoFn<String, Long>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            ctxt.output((long) ctxt.element().length());
        }
    })).apply("addKeys", WithKeys.of("foo")).setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of())).apply("gbk", GroupByKey.create());
    PAssert.that(result).containsInAnyOrder(KV.of("foo", ImmutableList.of(4L, 3L, 3L)));
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    // execute the pipeline
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("fakeId", "fakeRetrievalToken", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        Thread.sleep(1000);
    }
}
Also used : JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 54 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class PortableTimersExecutionTest method testTimerExecution.

@Test(timeout = 120_000)
public void testTimerExecution() throws Exception {
    FlinkPipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").as(FlinkPipelineOptions.class);
    options.setRunner(CrashingRunner.class);
    options.setFlinkMaster("[local]");
    options.setStreaming(isStreaming);
    options.setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    final String timerId = "foo";
    final String stateId = "sizzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    // Enough keys that we exercise interesting code paths
    int numKeys = 50;
    int numDuplicateTimers = 15;
    List<KV<String, Integer>> input = new ArrayList<>();
    List<KV<String, Integer>> expectedOutput = new ArrayList<>();
    for (Integer key = 0; key < numKeys; ++key) {
        // Each key should have just one final output at GC time
        expectedOutput.add(KV.of(key.toString(), timerOutput));
        for (int i = 0; i < numDuplicateTimers; ++i) {
            // Each input should be output with the offset added
            input.add(KV.of(key.toString(), i));
            expectedOutput.add(KV.of(key.toString(), i + offset));
        }
    }
    Collections.shuffle(input);
    DoFn<byte[], KV<String, Integer>> inputFn = new DoFn<byte[], KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            for (KV<String, Integer> stringIntegerKV : input) {
                context.output(stringIntegerKV);
            }
        }
    };
    DoFn<KV<String, Integer>, KV<String, Integer>> testFn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(@StateId(stateId) ValueState<String> state, OutputReceiver<KV<String, Integer>> r) {
            String read = Objects.requireNonNull(state.read(), "State must not be null");
            KV<String, Integer> of = KV.of(read, timerOutput);
            r.output(of);
        }
    };
    final Pipeline pipeline = Pipeline.create(options);
    PCollection<KV<String, Integer>> output = pipeline.apply("Impulse", Impulse.create()).apply("Input", ParDo.of(inputFn)).apply("Timers", ParDo.of(testFn));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options, new FlinkPipelineRunner(options, null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        Thread.sleep(1000);
    }
    assertThat(jobInvocation.getState(), is(JobState.Enum.DONE));
}
Also used : ArrayList(java.util.ArrayList) StateSpec(org.apache.beam.sdk.state.StateSpec) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) Timer(org.apache.beam.sdk.state.Timer) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 55 with PCollection

use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.

the class DataflowPipelineTranslatorTest method testToMap.

@Test
public void testToMap() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline pipeline = Pipeline.create(options);
    final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3))).apply(View.asMap());
    PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")).apply("OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
        }
    }).withSideInputs(view));
    PAssert.that(output).containsInAnyOrder(KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3));
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    runner.replaceV1Transforms(pipeline);
    SdkComponents sdkComponents = createSdkComponents(options);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
    Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
    List<Step> steps = job.getSteps();
    // Change detector assertion just to make sure the test was not a noop.
    // No need to actually check the pipeline as the ValidatesRunner tests
    // ensure translation is correct. This is just a quick check to see that translation
    // does not crash.
    assertEquals(24, steps.size());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) KV(org.apache.beam.sdk.values.KV) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) ByteString(org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString) Step(com.google.api.services.dataflow.model.Step) SdkComponents(org.apache.beam.runners.core.construction.SdkComponents) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Job(com.google.api.services.dataflow.model.Job) ImmutableMap(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap) Map(java.util.Map) Test(org.junit.Test)

Aggregations

RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)45 Test (org.junit.Test)45 Pipeline (org.apache.beam.sdk.Pipeline)25 PTransform (org.apache.beam.model.pipeline.v1.RunnerApi.PTransform)24 PCollection (org.apache.beam.model.pipeline.v1.RunnerApi.PCollection)22 PTransformNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PTransformNode)22 Map (java.util.Map)21 Components (org.apache.beam.model.pipeline.v1.RunnerApi.Components)21 PCollectionNode (org.apache.beam.runners.core.construction.graph.PipelineNode.PCollectionNode)21 ByteString (org.apache.beam.vendor.grpc.v1p43p2.com.google.protobuf.ByteString)17 ArrayList (java.util.ArrayList)16 HashMap (java.util.HashMap)14 Environment (org.apache.beam.model.pipeline.v1.RunnerApi.Environment)13 SdkComponents (org.apache.beam.runners.core.construction.SdkComponents)13 PCollection (org.apache.beam.sdk.values.PCollection)12 Coder (org.apache.beam.sdk.coders.Coder)11 KV (org.apache.beam.sdk.values.KV)11 Collection (java.util.Collection)10 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9