Search in sources :

Example 6 with JobInvocation

use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.

the class PortableTimersExecutionTest method testTimerExecution.

@Test(timeout = 120_000)
public void testTimerExecution() throws Exception {
    FlinkPipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").as(FlinkPipelineOptions.class);
    options.setRunner(CrashingRunner.class);
    options.setFlinkMaster("[local]");
    options.setStreaming(isStreaming);
    options.setParallelism(2);
    options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
    final String timerId = "foo";
    final String stateId = "sizzle";
    final int offset = 5000;
    final int timerOutput = 4093;
    // Enough keys that we exercise interesting code paths
    int numKeys = 50;
    int numDuplicateTimers = 15;
    List<KV<String, Integer>> input = new ArrayList<>();
    List<KV<String, Integer>> expectedOutput = new ArrayList<>();
    for (Integer key = 0; key < numKeys; ++key) {
        // Each key should have just one final output at GC time
        expectedOutput.add(KV.of(key.toString(), timerOutput));
        for (int i = 0; i < numDuplicateTimers; ++i) {
            // Each input should be output with the offset added
            input.add(KV.of(key.toString(), i));
            expectedOutput.add(KV.of(key.toString(), i + offset));
        }
    }
    Collections.shuffle(input);
    DoFn<byte[], KV<String, Integer>> inputFn = new DoFn<byte[], KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext context) {
            for (KV<String, Integer> stringIntegerKV : input) {
                context.output(stringIntegerKV);
            }
        }
    };
    DoFn<KV<String, Integer>, KV<String, Integer>> testFn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {

        @TimerId(timerId)
        private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);

        @StateId(stateId)
        private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());

        @ProcessElement
        public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
            timer.set(window.maxTimestamp());
            state.write(context.element().getKey());
            context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
        }

        @OnTimer(timerId)
        public void onTimer(@StateId(stateId) ValueState<String> state, OutputReceiver<KV<String, Integer>> r) {
            String read = Objects.requireNonNull(state.read(), "State must not be null");
            KV<String, Integer> of = KV.of(read, timerOutput);
            r.output(of);
        }
    };
    final Pipeline pipeline = Pipeline.create(options);
    PCollection<KV<String, Integer>> output = pipeline.apply("Impulse", Impulse.create()).apply("Input", ParDo.of(inputFn)).apply("Timers", ParDo.of(testFn));
    PAssert.that(output).containsInAnyOrder(expectedOutput);
    RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
    JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options, new FlinkPipelineRunner(options, null, Collections.emptyList()));
    jobInvocation.start();
    while (jobInvocation.getState() != JobState.Enum.DONE) {
        Thread.sleep(1000);
    }
    assertThat(jobInvocation.getState(), is(JobState.Enum.DONE));
}
Also used : ArrayList(java.util.ArrayList) StateSpec(org.apache.beam.sdk.state.StateSpec) RunnerApi(org.apache.beam.model.pipeline.v1.RunnerApi) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) TimerSpec(org.apache.beam.sdk.state.TimerSpec) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation) KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) ValueState(org.apache.beam.sdk.state.ValueState) Timer(org.apache.beam.sdk.state.Timer) PortablePipelineOptions(org.apache.beam.sdk.options.PortablePipelineOptions) Test(org.junit.Test)

Example 7 with JobInvocation

use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.

the class SparkJobInvoker method createJobInvocation.

static JobInvocation createJobInvocation(String invocationId, String retrievalToken, ListeningExecutorService executorService, Pipeline pipeline, SparkPipelineOptions sparkOptions) {
    JobInfo jobInfo = JobInfo.create(invocationId, sparkOptions.getJobName(), retrievalToken, PipelineOptionsTranslation.toProto(sparkOptions));
    PortablePipelineRunner pipelineRunner;
    if (Strings.isNullOrEmpty(sparkOptions.as(PortablePipelineOptions.class).getOutputExecutablePath())) {
        pipelineRunner = new SparkPipelineRunner(sparkOptions);
    } else {
        pipelineRunner = new PortablePipelineJarCreator(SparkPipelineRunner.class);
    }
    return new JobInvocation(jobInfo, executorService, pipeline, pipelineRunner);
}
Also used : PortablePipelineRunner(org.apache.beam.runners.jobsubmission.PortablePipelineRunner) JobInfo(org.apache.beam.runners.fnexecution.provisioning.JobInfo) PortablePipelineJarCreator(org.apache.beam.runners.jobsubmission.PortablePipelineJarCreator) JobInvocation(org.apache.beam.runners.jobsubmission.JobInvocation)

Aggregations

JobInvocation (org.apache.beam.runners.jobsubmission.JobInvocation)7 RunnerApi (org.apache.beam.model.pipeline.v1.RunnerApi)5 PortablePipelineOptions (org.apache.beam.sdk.options.PortablePipelineOptions)5 Pipeline (org.apache.beam.sdk.Pipeline)4 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)3 DoFn (org.apache.beam.sdk.transforms.DoFn)3 KV (org.apache.beam.sdk.values.KV)3 Test (org.junit.Test)3 JobInfo (org.apache.beam.runners.fnexecution.provisioning.JobInfo)2 PortablePipelineJarCreator (org.apache.beam.runners.jobsubmission.PortablePipelineJarCreator)2 PortablePipelineRunner (org.apache.beam.runners.jobsubmission.PortablePipelineRunner)2 StateSpec (org.apache.beam.sdk.state.StateSpec)2 ValueState (org.apache.beam.sdk.state.ValueState)2 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)2 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 List (java.util.List)1 NoSuchElementException (java.util.NoSuchElementException)1 Executors (java.util.concurrent.Executors)1