use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class PortableTimersExecutionTest method testTimerExecution.
@Test(timeout = 120_000)
public void testTimerExecution() throws Exception {
FlinkPipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").as(FlinkPipelineOptions.class);
options.setRunner(CrashingRunner.class);
options.setFlinkMaster("[local]");
options.setStreaming(isStreaming);
options.setParallelism(2);
options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
final String timerId = "foo";
final String stateId = "sizzle";
final int offset = 5000;
final int timerOutput = 4093;
// Enough keys that we exercise interesting code paths
int numKeys = 50;
int numDuplicateTimers = 15;
List<KV<String, Integer>> input = new ArrayList<>();
List<KV<String, Integer>> expectedOutput = new ArrayList<>();
for (Integer key = 0; key < numKeys; ++key) {
// Each key should have just one final output at GC time
expectedOutput.add(KV.of(key.toString(), timerOutput));
for (int i = 0; i < numDuplicateTimers; ++i) {
// Each input should be output with the offset added
input.add(KV.of(key.toString(), i));
expectedOutput.add(KV.of(key.toString(), i + offset));
}
}
Collections.shuffle(input);
DoFn<byte[], KV<String, Integer>> inputFn = new DoFn<byte[], KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext context) {
for (KV<String, Integer> stringIntegerKV : input) {
context.output(stringIntegerKV);
}
}
};
DoFn<KV<String, Integer>, KV<String, Integer>> testFn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(@StateId(stateId) ValueState<String> state, OutputReceiver<KV<String, Integer>> r) {
String read = Objects.requireNonNull(state.read(), "State must not be null");
KV<String, Integer> of = KV.of(read, timerOutput);
r.output(of);
}
};
final Pipeline pipeline = Pipeline.create(options);
PCollection<KV<String, Integer>> output = pipeline.apply("Impulse", Impulse.create()).apply("Input", ParDo.of(inputFn)).apply("Timers", ParDo.of(testFn));
PAssert.that(output).containsInAnyOrder(expectedOutput);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options, new FlinkPipelineRunner(options, null, Collections.emptyList()));
jobInvocation.start();
while (jobInvocation.getState() != JobState.Enum.DONE) {
Thread.sleep(1000);
}
assertThat(jobInvocation.getState(), is(JobState.Enum.DONE));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testToMap.
@Test
public void testToMap() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline pipeline = Pipeline.create(options);
final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("b", 3))).apply(View.asMap());
PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.of("apple", "banana", "blackberry")).apply("OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
}
}).withSideInputs(view));
PAssert.that(output).containsInAnyOrder(KV.of("apple", 1), KV.of("banana", 3), KV.of("blackberry", 3));
DataflowRunner runner = DataflowRunner.fromOptions(options);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
List<Step> steps = job.getSteps();
// Change detector assertion just to make sure the test was not a noop.
// No need to actually check the pipeline as the ValidatesRunner tests
// ensure translation is correct. This is just a quick check to see that translation
// does not crash.
assertEquals(24, steps.size());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testNumWorkersCannotExceedMaxNumWorkers.
@Test
public void testNumWorkersCannotExceedMaxNumWorkers() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setNumWorkers(43);
options.setMaxNumWorkers(42);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("numWorkers (43) cannot exceed maxNumWorkers (42).");
DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testStepDisplayData.
@Test
public void testStepDisplayData() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
DoFn<Integer, Integer> fn1 = new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(c.element());
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
builder.add(DisplayData.item("foo", "bar")).add(DisplayData.item("foo2", DataflowPipelineTranslatorTest.class).withLabel("Test Class").withLinkUrl("http://www.google.com"));
}
};
DoFn<Integer, Integer> fn2 = new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext c) throws Exception {
c.output(c.element());
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
builder.add(DisplayData.item("foo3", 1234));
}
};
ParDo.SingleOutput<Integer, Integer> parDo1 = ParDo.of(fn1);
ParDo.SingleOutput<Integer, Integer> parDo2 = ParDo.of(fn2);
pipeline.apply(Create.of(1, 2, 3)).apply(parDo1).apply(parDo2);
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);
List<Step> steps = job.getSteps();
assertEquals(3, steps.size());
Map<String, Object> parDo1Properties = steps.get(1).getProperties();
Map<String, Object> parDo2Properties = steps.get(2).getProperties();
assertThat(parDo1Properties, hasKey("display_data"));
@SuppressWarnings("unchecked") Collection<Map<String, String>> fn1displayData = (Collection<Map<String, String>>) parDo1Properties.get("display_data");
@SuppressWarnings("unchecked") Collection<Map<String, String>> fn2displayData = (Collection<Map<String, String>>) parDo2Properties.get("display_data");
ImmutableSet<ImmutableMap<String, Object>> expectedFn1DisplayData = ImmutableSet.of(ImmutableMap.<String, Object>builder().put("key", "foo").put("type", "STRING").put("value", "bar").put("namespace", fn1.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "fn").put("label", "Transform Function").put("type", "JAVA_CLASS").put("value", fn1.getClass().getName()).put("shortValue", fn1.getClass().getSimpleName()).put("namespace", parDo1.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "foo2").put("type", "JAVA_CLASS").put("value", DataflowPipelineTranslatorTest.class.getName()).put("shortValue", DataflowPipelineTranslatorTest.class.getSimpleName()).put("namespace", fn1.getClass().getName()).put("label", "Test Class").put("linkUrl", "http://www.google.com").build());
ImmutableSet<ImmutableMap<String, Object>> expectedFn2DisplayData = ImmutableSet.of(ImmutableMap.<String, Object>builder().put("key", "fn").put("label", "Transform Function").put("type", "JAVA_CLASS").put("value", fn2.getClass().getName()).put("shortValue", fn2.getClass().getSimpleName()).put("namespace", parDo2.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "foo3").put("type", "INTEGER").put("value", 1234L).put("namespace", fn2.getClass().getName()).build());
assertEquals(expectedFn1DisplayData, ImmutableSet.copyOf(fn1displayData));
assertEquals(expectedFn2DisplayData, ImmutableSet.copyOf(fn2displayData));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testInaccessibleProvider.
@Test
public void testInaccessibleProvider() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline pipeline = Pipeline.create(options);
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
pipeline.apply(TextIO.read().from(new TestValueProvider()));
// Check that translation does not fail.
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
t.translate(pipeline, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
}
Aggregations