use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.
the class PortableTimersExecutionTest method testTimerExecution.
@Test(timeout = 120_000)
public void testTimerExecution() throws Exception {
FlinkPipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").as(FlinkPipelineOptions.class);
options.setRunner(CrashingRunner.class);
options.setFlinkMaster("[local]");
options.setStreaming(isStreaming);
options.setParallelism(2);
options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
final String timerId = "foo";
final String stateId = "sizzle";
final int offset = 5000;
final int timerOutput = 4093;
// Enough keys that we exercise interesting code paths
int numKeys = 50;
int numDuplicateTimers = 15;
List<KV<String, Integer>> input = new ArrayList<>();
List<KV<String, Integer>> expectedOutput = new ArrayList<>();
for (Integer key = 0; key < numKeys; ++key) {
// Each key should have just one final output at GC time
expectedOutput.add(KV.of(key.toString(), timerOutput));
for (int i = 0; i < numDuplicateTimers; ++i) {
// Each input should be output with the offset added
input.add(KV.of(key.toString(), i));
expectedOutput.add(KV.of(key.toString(), i + offset));
}
}
Collections.shuffle(input);
DoFn<byte[], KV<String, Integer>> inputFn = new DoFn<byte[], KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext context) {
for (KV<String, Integer> stringIntegerKV : input) {
context.output(stringIntegerKV);
}
}
};
DoFn<KV<String, Integer>, KV<String, Integer>> testFn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(@StateId(stateId) ValueState<String> state, OutputReceiver<KV<String, Integer>> r) {
String read = Objects.requireNonNull(state.read(), "State must not be null");
KV<String, Integer> of = KV.of(read, timerOutput);
r.output(of);
}
};
final Pipeline pipeline = Pipeline.create(options);
PCollection<KV<String, Integer>> output = pipeline.apply("Impulse", Impulse.create()).apply("Input", ParDo.of(inputFn)).apply("Timers", ParDo.of(testFn));
PAssert.that(output).containsInAnyOrder(expectedOutput);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options, new FlinkPipelineRunner(options, null, Collections.emptyList()));
jobInvocation.start();
while (jobInvocation.getState() != JobState.Enum.DONE) {
Thread.sleep(1000);
}
assertThat(jobInvocation.getState(), is(JobState.Enum.DONE));
}
use of org.apache.beam.runners.jobsubmission.JobInvocation in project beam by apache.
the class SparkJobInvoker method createJobInvocation.
static JobInvocation createJobInvocation(String invocationId, String retrievalToken, ListeningExecutorService executorService, Pipeline pipeline, SparkPipelineOptions sparkOptions) {
JobInfo jobInfo = JobInfo.create(invocationId, sparkOptions.getJobName(), retrievalToken, PipelineOptionsTranslation.toProto(sparkOptions));
PortablePipelineRunner pipelineRunner;
if (Strings.isNullOrEmpty(sparkOptions.as(PortablePipelineOptions.class).getOutputExecutablePath())) {
pipelineRunner = new SparkPipelineRunner(sparkOptions);
} else {
pipelineRunner = new PortablePipelineJarCreator(SparkPipelineRunner.class);
}
return new JobInvocation(jobInfo, executorService, pipeline, pipelineRunner);
}
Aggregations