Search in sources :

Example 1 with JobInvocation

use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.

the class SamzaJobInvoker method invokeWithExecutor.

@Override
protected JobInvocation invokeWithExecutor(RunnerApi.Pipeline pipeline, Struct options, @Nullable String retrievalToken, ListeningExecutorService executorService) {
    LOG.trace("Parsing pipeline options");
    final SamzaPortablePipelineOptions samzaOptions = PipelineOptionsTranslation.fromProto(options).as(SamzaPortablePipelineOptions.class);
    final PortablePipelineRunner pipelineRunner;
    if (Strings.isNullOrEmpty(samzaOptions.getOutputExecutablePath())) {
        pipelineRunner = new SamzaPipelineRunner(samzaOptions);
    } else {
        /*
       * To support --output_executable_path where bundles the input pipeline along with all
       * artifacts, etc. required to run the pipeline into a jar that can be executed later.
       */
        pipelineRunner = new PortablePipelineJarCreator(SamzaPipelineRunner.class);
    }
    final String invocationId = String.format("%s_%s", samzaOptions.getJobName(), UUID.randomUUID().toString());
    final JobInfo jobInfo = JobInfo.create(invocationId, samzaOptions.getJobName(), retrievalToken, options);
    return new JobInvocation(jobInfo, executorService, pipeline, pipelineRunner);
}
Also used : PortablePipelineRunner(org.apache.beam.runners.jobsubmission.PortablePipelineRunner) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) PortablePipelineJarCreator(org.apache.beam.runners.jobsubmission.PortablePipelineJarCreator) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation)

Example 2 with JobInvocation

use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.

the class ReadSourcePortableTest method testExecution.

@Test(timeout = 120_000)
public void testExecution() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=use_deprecated_read").create();
    options.setRunner(CrashingRunner.class);
    options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
    options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
    options.as(FlinkPipelineOptions.class).setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    Pipeline p = Pipeline.create(options);
    PCollection<Long> result = p.apply(Read.from(new Source(10))).apply(Window.into(FixedWindows.of(Duration.millis(1))));
    PAssert.that(result).containsInAnyOrder(ImmutableList.of(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L));
    SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReads(p);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    List<RunnerApi.PTransform> readTransforms = pipelineProto.getComponents().getTransformsMap().values().stream().filter(transform -> transform.getSpec().getUrn().equals(PTransformTranslation.READ_TRANSFORM_URN)).collect(Collectors.toList());
    assertThat(readTransforms, not(empty()));
    // execute the pipeline
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("fakeId", "fakeRetrievalToken", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        assertThat(jobInvocation.getState(), not(JobState.Enum.FAILED));
        Thread.sleep(100);
    }
}
Also used : SerializableCoder(org.apache.beam.sdk.coders.SerializableCoder) BeforeClass(org.junit.BeforeClass) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) Matchers.not(org.hamcrest.Matchers.not) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) LoggerFactory(org.slf4j.LoggerFactory) Coder(org.apache.beam.sdk.coders.Coder) PipelineTranslation(org.apache.beam.runners.core.construction.PipelineTranslation) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Environments(org.apache.beam.runners.core.construction.Environments) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) Read(org.apache.beam.sdk.io.Read) Window(org.apache.beam.sdk.transforms.windowing.Window) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) Pipeline(org.apache.beam.sdk.Pipeline) NoSuchElementException(java.util.NoSuchElementException) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Nullable(org.checkerframework.checker.nullness.qual.Nullable) Parameterized(org.junit.runners.Parameterized) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) Matchers.empty(org.hamcrest.Matchers.empty) AfterClass(org.junit.AfterClass) PTransformTranslation(org.apache.beam.runners.core.construction.PTransformTranslation) MoreExecutors(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.MoreExecutors) Logger(org.slf4j.Logger) PAssert(org.apache.beam.sdk.testing.PAssert) Parameter(org.junit.runners.Parameterized.Parameter) FixedWindows(org.apache.beam.sdk.transforms.windowing.FixedWindows) SplittableParDo(org.apache.beam.runners.core.construction.SplittableParDo) Test(org.junit.Test) PCollection(org.apache.beam.sdk.values.PCollection) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) Serializable(java.io.Serializable) TimeUnit(java.util.concurrent.TimeUnit) CrashingRunner(org.apache.beam.sdk.testing.CrashingRunner) List(java.util.List) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) ListeningExecutorService(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.util.concurrent.ListeningExecutorService) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Collections(java.util.Collections) JobState(org.apache.beam.model.jobmanagement.v1.JobApi.JobState) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) UnboundedSource(org.apache.beam.sdk.io.UnboundedSource) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 3 with JobInvocation

use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.

the class FlinkSavepointTest method executePortable.

private JobID executePortable(Pipeline pipeline) throws Exception {
    pipeline.getOptions().as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    pipeline.getOptions().as(FlinkPipelineOptions.class).setFlinkMaster(getFlinkMaster());
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
    FlinkPipelineOptions pipelineOptions = pipeline.getOptions().as(FlinkPipelineOptions.class);
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, pipelineOptions, new FlinkPipelineRunner(pipelineOptions, null, Collections.emptyList()));
    jobInvocation.start();
    return waitForJobToBeReady(pipeline.getOptions().getJobName());
}
Also used : RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation)

Example 4 with JobInvocation

use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.

the class PortableStateExecutionTest method testExecution.

@Test(timeout = 120_000)
public void testExecution() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").create();
    options.setRunner(CrashingRunner.class);
    options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
    options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
    options.as(FlinkPipelineOptions.class).setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, String>> output = p.apply(Impulse.create()).apply(ParDo.of(new DoFn<byte[], KV<String, Integer>>() {

        @ProcessElement
        public void process(ProcessContext ctx) {
            // Values == -1 will clear the state
            ctx.output(KV.of("clearedState", 1));
            ctx.output(KV.of("clearedState", CLEAR_STATE));
            // values >= 1 will be added on top of each other
            ctx.output(KV.of("bla1", 42));
            ctx.output(KV.of("bla", 23));
            ctx.output(KV.of("bla2", 64));
            ctx.output(KV.of("bla", 1));
            ctx.output(KV.of("bla", 1));
            // values == -2 will write the current state to the output
            ctx.output(KV.of("bla", WRITE_STATE));
            ctx.output(KV.of("bla1", WRITE_STATE));
            ctx.output(KV.of("bla2", WRITE_STATE));
            ctx.output(KV.of("clearedState", WRITE_STATE));
        }
    })).apply("statefulDoFn", ParDo.of(new DoFn<KV<String, Integer>, KV<String, String>>() {

        @StateId("valueState")
        private final StateSpec<ValueState<Integer>> valueStateSpec = StateSpecs.value(VarIntCoder.of());

        @StateId("valueState2")
        private final StateSpec<ValueState<Integer>> valueStateSpec2 = StateSpecs.value(VarIntCoder.of());

        @ProcessElement
        public void process(ProcessContext ctx, @StateId("valueState") ValueState<Integer> valueState, @StateId("valueState2") ValueState<Integer> valueState2) {
            performStateUpdates(ctx, valueState);
            performStateUpdates(ctx, valueState2);
        }

        private void performStateUpdates(ProcessContext ctx, ValueState<Integer> valueState) {
            Integer value = ctx.element().getValue();
            if (value == null) {
                throw new IllegalStateException();
            }
            switch(value) {
                case CLEAR_STATE:
                    valueState.clear();
                    break;
                case WRITE_STATE:
                    Integer read = valueState.read();
                    ctx.output(KV.of(ctx.element().getKey(), read == null ? "null" : read.toString()));
                    break;
                default:
                    Integer currentState = valueState.read();
                    if (currentState == null) {
                        currentState = value;
                    } else {
                        currentState += value;
                    }
                    valueState.write(currentState);
            }
        }
    }));
    PAssert.that(output).containsInAnyOrder(KV.of("bla", "25"), KV.of("bla1", "42"), KV.of("bla2", "64"), KV.of("clearedState", "null"), KV.of("bla", "25"), KV.of("bla1", "42"), KV.of("bla2", "64"), KV.of("clearedState", "null"));
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        Thread.sleep(1000);
    }
}
Also used : JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) StateSpec(org.apache.beam.sdk.state.StateSpec) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 5 with JobInvocation

use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.

the class PortableExecutionTest method testExecution.

@Test(timeout = 120_000)
public void testExecution() throws Exception {
    PipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").create();
    options.setRunner(CrashingRunner.class);
    options.as(FlinkPipelineOptions.class).setFlinkMaster("[local]");
    options.as(FlinkPipelineOptions.class).setStreaming(isStreaming);
    options.as(FlinkPipelineOptions.class).setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, Iterable<Long>>> result = p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], String>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            ctxt.output("zero");
            ctxt.output("one");
            ctxt.output("two");
        }
    })).apply("len", ParDo.of(new DoFn<String, Long>() {

        @ProcessElement
        public void process(ProcessContext ctxt) {
            ctxt.output((long) ctxt.element().length());
        }
    })).apply("addKeys", WithKeys.of("foo")).setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianLongCoder.of())).apply("gbk", GroupByKey.create());
    PAssert.that(result).containsInAnyOrder(KV.of("foo", ImmutableList.of(4L, 3L, 3L)));
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
    // execute the pipeline
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("fakeId", "fakeRetrievalToken", flinkJobExecutor, pipelineProto, options.as(FlinkPipelineOptions.class), new FlinkPipelineRunner(options.as(FlinkPipelineOptions.class), null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        Thread.sleep(1000);
    }
}
Also used : JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) DoFn(org.apache.beam.sdk.transforms.DoFn) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Aggregations

JobInvocation (org.apache.beam.runners.jobsubmission.JobInvocation)7 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)5 PortablePipelineOptions (org.apache.beam.sdk.options.PortablePipelineOptions)5 Pipeline (org.apache.beam.sdk.Pipeline)4 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)3 DoFn (org.apache.beam.sdk.transforms.DoFn)3 KV (org.apache.beam.sdk.values.KV)3 Test (org.junit.Test)3 JobInfo (org.apache.beam.runners.fnexecution.provisioning.JobInfo)2 PortablePipelineJarCreator (org.apache.beam.runners.jobsubmission.PortablePipelineJarCreator)2 PortablePipelineRunner (org.apache.beam.runners.jobsubmission.PortablePipelineRunner)2 StateSpec (org.apache.beam.sdk.state.StateSpec)2 ValueState (org.apache.beam.sdk.state.ValueState)2 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)2 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 List (java.util.List)1 NoSuchElementException (java.util.NoSuchElementException)1 Executors (java.util.concurrent.Executors)1