use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testToList.
/**
* Testing just the translation of the pipeline from ViewTest#testToList.
*/
@Test
public void testToList() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline pipeline = Pipeline.create(options);
final PCollectionView<List<Integer>> view = pipeline.apply("CreateSideInput", Create.of(11, 13, 17, 23)).apply(View.asList());
pipeline.apply("CreateMainInput", Create.of(29, 31)).apply("OutputSideInputs", ParDo.of(new DoFn<Integer, Integer>() {
@ProcessElement
public void processElement(ProcessContext c) {
checkArgument(c.sideInput(view).size() == 4);
checkArgument(c.sideInput(view).get(0).equals(c.sideInput(view).get(0)));
for (Integer i : c.sideInput(view)) {
c.output(i);
}
}
}).withSideInputs(view));
DataflowRunner runner = DataflowRunner.fromOptions(options);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
List<Step> steps = job.getSteps();
// Change detector assertion just to make sure the test was not a noop.
// No need to actually check the pipeline as the ValidatesRunner tests
// ensure translation is correct. This is just a quick check to see that translation
// does not crash.
assertEquals(5, steps.size());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testSetWorkerHarnessContainerImageInPipelineProto.
/**
* Tests that when (deprecated) {@link
* DataflowPipelineOptions#setWorkerHarnessContainerImage(String)} pipeline option is set, {@link
* DataflowRunner} sets that value as the {@link DockerPayload#getContainerImage()} of the default
* {@link Environment} used when generating the model pipeline proto.
*/
@Test
public void testSetWorkerHarnessContainerImageInPipelineProto() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
String containerImage = "gcr.io/image:foo";
options.as(DataflowPipelineOptions.class).setWorkerHarnessContainerImage(containerImage);
Pipeline p = Pipeline.create(options);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline proto = PipelineTranslation.toProto(p, sdkComponents, true);
JobSpecification specification = DataflowPipelineTranslator.fromOptions(options).translate(p, proto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
RunnerApi.Pipeline pipelineProto = specification.getPipelineProto();
assertEquals(1, pipelineProto.getComponents().getEnvironmentsCount());
Environment defaultEnvironment = Iterables.getOnlyElement(pipelineProto.getComponents().getEnvironmentsMap().values());
DockerPayload payload = DockerPayload.parseFrom(defaultEnvironment.getPayload());
assertEquals(DataflowRunner.getContainerImageForJob(options), payload.getContainerImage());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testBatchStatefulParDoTranslation.
/**
* Smoke test to fail fast if translation of a stateful ParDo in batch breaks.
*/
@Test
public void testBatchStatefulParDoTranslation() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {
};
pipeline.apply(Create.of(KV.of(1, 1))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Integer>() {
@StateId("unused")
final StateSpec<ValueState<Integer>> stateSpec = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void process(ProcessContext c) {
// noop
}
}).withOutputTags(mainOutputTag, TupleTagList.empty()));
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
// The job should look like:
// 0. ParallelRead (Create)
// 1. ParDo(ReifyWVs)
// 2. GroupByKeyAndSortValuesONly
// 3. A ParDo over grouped and sorted KVs that is executed via ungrouping service-side
List<Step> steps = job.getSteps();
assertEquals(4, steps.size());
Step createStep = steps.get(0);
assertEquals("ParallelRead", createStep.getKind());
Step reifyWindowedValueStep = steps.get(1);
assertEquals("ParallelDo", reifyWindowedValueStep.getKind());
Step gbkStep = steps.get(2);
assertEquals("GroupByKey", gbkStep.getKind());
Step statefulParDoStep = steps.get(3);
assertEquals("ParallelDo", statefulParDoStep.getKind());
assertThat((String) statefulParDoStep.getProperties().get(PropertyNames.USES_KEYED_STATE), not(equalTo("true")));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testStepResourceHints.
@Test
public void testStepResourceHints() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(1, 2, 3)).apply("Has hints", MapElements.into(TypeDescriptors.integers()).via((Integer x) -> x + 1).setResourceHints(ResourceHints.create().withMinRam("10.0GiB").withAccelerator("type:nvidia-tesla-k80;count:1;install-nvidia-driver")));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
Step stepWithHints = job.getSteps().get(1);
ImmutableMap<String, Object> expectedHints = ImmutableMap.<String, Object>builder().put("beam:resources:min_ram_bytes:v1", "10737418240").put("beam:resources:accelerator:v1", "type:nvidia-tesla-k80;count:1;install-nvidia-driver").build();
assertEquals(expectedHints, stepWithHints.getProperties().get("resource_hints"));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet.
@Test
public void testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet() throws IOException {
final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = null;
DataflowPipelineOptions options = buildPipelineOptions();
options.setMaxNumWorkers(42);
options.setAutoscalingAlgorithm(noScaling);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertNull(job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
assertEquals(42, job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getMaxNumWorkers().intValue());
}
Aggregations