use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testStreamingSplittableParDoTranslation.
/**
* Smoke test to fail fast if translation of a splittable ParDo in streaming breaks.
*/
@Test
public void testStreamingSplittableParDoTranslation() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(true);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
PCollection<String> windowedInput = pipeline.apply(Create.of("a")).apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
windowedInput.apply(ParDo.of(new TestSplittableFn()));
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
// The job should contain a SplittableParDo.ProcessKeyedElements step, translated as
// "SplittableProcessKeyed".
List<Step> steps = job.getSteps();
Step processKeyedStep = null;
for (Step step : steps) {
if ("SplittableProcessKeyed".equals(step.getKind())) {
assertNull(processKeyedStep);
processKeyedStep = step;
}
}
assertNotNull(processKeyedStep);
@SuppressWarnings({ "unchecked", "rawtypes" }) DoFnInfo<String, Integer> fnInfo = (DoFnInfo<String, Integer>) SerializableUtils.deserializeFromByteArray(jsonStringToByteArray(getString(processKeyedStep.getProperties(), PropertyNames.SERIALIZED_FN)), "DoFnInfo");
assertThat(fnInfo.getDoFn(), instanceOf(TestSplittableFn.class));
assertThat(fnInfo.getWindowingStrategy().getWindowFn(), Matchers.<WindowFn>equalTo(FixedWindows.of(Duration.standardMinutes(1))));
assertThat(fnInfo.getInputCoder(), instanceOf(StringUtf8Coder.class));
Coder<?> restrictionCoder = CloudObjects.coderFromCloudObject((CloudObject) Structs.getObject(processKeyedStep.getProperties(), PropertyNames.RESTRICTION_CODER));
assertEquals(KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of()), restrictionCoder);
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testToList.
/**
* Testing just the translation of the pipeline from ViewTest#testToList.
*/
@Test
public void testToList() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline pipeline = Pipeline.create(options);
final PCollectionView<List<Integer>> view = pipeline.apply("CreateSideInput", Create.of(11, 13, 17, 23)).apply(View.asList());
pipeline.apply("CreateMainInput", Create.of(29, 31)).apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext c) {
checkArgument(c.sideInput(view).size() == 4);
checkArgument(c.sideInput(view).get(0).equals(c.sideInput(view).get(0)));
for (Integer i : c.sideInput(view)) {
c.output(i);
}
}
}).withSideInputs(view));
DataflowRunner runner = DataflowRunner.fromOptions(options);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
List<Step> steps = job.getSteps();
// Change detector assertion just to make sure the test was not a noop.
// No need to actually check the pipeline as the ValidatesRunner tests
// ensure translation is correct. This is just a quick check to see that translation
// does not crash.
assertEquals(5, steps.size());
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testSetWorkerHarnessContainerImageInPipelineProto.
/**
* Tests that when (deprecated) {@link
* DataflowPipelineOptions#setWorkerHarnessContainerImage(String)} pipeline option is set, {@link
* DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of the default
* {@link Environment} used when generating the model pipeline proto.
*/
@Test
public void testSetWorkerHarnessContainerImageInPipelineProto() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
String containerImage = "gcr.io/image:foo";
options.as(DataflowPipelineOptions.class).setWorkerHarnessContainerImage(containerImage);
Pipeline p = Pipeline.create(options);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testBatchStatefulParDoTranslation.
/**
* Smoke test to fail fast if translation of a stateful ParDo in batch breaks.
*/
@Test
public void testBatchStatefulParDoTranslation() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {
};
pipeline.apply(Create.of(KV.of(1, 1))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Integer>() {
@StateId("unused")
final StateSpec<ValueState<Integer>> stateSpec = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void process(ProcessContext c) {
// noop
}
}).withOutputTags(mainOutputTag, TupleTagList.empty()));
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
// The job should look like:
// 0. ParallelRead (Create)
// 1. ParDo(ReifyWVs)
// 2. GroupByKeyAndSortValuesONly
// 3. A ParDo over grouped and sorted KVs that is executed via ungrouping service-side
List<Step> steps = job.getSteps();
assertEquals(4, steps.size());
Step createStep = steps.get(0);
assertEquals("ParallelRead", createStep.getKind());
Step reifyWindowedValueStep = steps.get(1);
assertEquals("ParallelDo", reifyWindowedValueStep.getKind());
Step gbkStep = steps.get(2);
assertEquals("GroupByKey", gbkStep.getKind());
Step statefulParDoStep = steps.get(3);
assertEquals("ParallelDo", statefulParDoStep.getKind());
assertThat((String) statefulParDoStep.getProperties().get(PropertyNames.USES_KEYED_STATE), not(equalTo("true")));
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testStepResourceHints.
@Test
public void testStepResourceHints() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(1, 2, 3)).apply("Has hints", MapElements.into(TypeDescriptors.integers()).via((Integer x) -> x + 1).setResourceHints(ResourceHints.create().withMinRam("10.0GiB").withAccelerator("type:nvidia-tesla-k80;count:1;install-nvidia-driver")));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
Step stepWithHints = job.getSteps().get(1);
ImmutableMap<String, Object> expectedHints = ImmutableMap.<String, Object>builder().put("beam:resources:min_ram_bytes:v1", "10737418240").put("beam:resources:accelerator:v1", "type:nvidia-tesla-k80;count:1;install-nvidia-driver").build();
assertEquals(expectedHints, stepWithHints.getProperties().get("resource_hints"));
}
Aggregations