use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowRunnerTest method testApplySdkEnvironmentOverrides.
@Test
public void testApplySdkEnvironmentOverrides() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
String dockerHubPythonContainerUrl = "apache/beam_python3.8_sdk:latest";
String gcrPythonContainerUrl = "gcr.io/apache-beam-testing/beam-sdk/beam_python3.8_sdk:latest";
options.setSdkHarnessContainerImageOverrides(".*python.*," + gcrPythonContainerUrl);
DataflowRunner runner = DataflowRunner.fromOptions(options);
RunnerApi.Pipeline pipeline = RunnerApi.Pipeline.newBuilder().setComponents(RunnerApi.Components.newBuilder().putEnvironments("env", RunnerApi.Environment.newBuilder().setUrn(BeamUrns.getUrn(RunnerApi.StandardEnvironments.Environments.DOCKER)).setPayload(RunnerApi.DockerPayload.newBuilder().setContainerImage(dockerHubPythonContainerUrl).build().toByteString()).build())).build();
RunnerApi.Pipeline expectedPipeline = RunnerApi.Pipeline.newBuilder().setComponents(RunnerApi.Components.newBuilder().putEnvironments("env", RunnerApi.Environment.newBuilder().setUrn(BeamUrns.getUrn(RunnerApi.StandardEnvironments.Environments.DOCKER)).setPayload(RunnerApi.DockerPayload.newBuilder().setContainerImage(gcrPythonContainerUrl).build().toByteString()).build())).build();
assertThat(runner.applySdkEnvironmentOverrides(pipeline, options), equalTo(expectedPipeline));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testNamesOverridden.
/**
* Test that in translation the name for a collection (in this case just a Create output) is
* overridden to be what the Dataflow service expects.
*/
@Test
public void testNamesOverridden() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle");
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
// The Create step
Step step = job.getSteps().get(0);
// This is the name that is "set by the user" that the Dataflow translator must override
String userSpecifiedName = getString(Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME);
// This is the calculated name that must actually be used
String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0";
assertThat(userSpecifiedName, equalTo(calculatedName));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testScalingAlgorithmMissing.
@Test
public void testScalingAlgorithmMissing() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
// Autoscaling settings are always set.
assertNull(job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
assertEquals(0, job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getMaxNumWorkers().intValue());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testScalingAlgorithmNone.
@Test
public void testScalingAlgorithmNone() throws IOException {
final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType.NONE;
DataflowPipelineOptions options = buildPipelineOptions();
options.setAutoscalingAlgorithm(noScaling);
options.setNumWorkers(42);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals("AUTOSCALING_ALGORITHM_NONE", job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
assertEquals(42, job.getEnvironment().getWorkerPools().get(0).getNumWorkers().intValue());
assertEquals(0, job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getMaxNumWorkers().intValue());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testToIterableTranslationWithIsmSideInput.
@Test
public void testToIterableTranslationWithIsmSideInput() throws Exception {
// A "change detector" test that makes sure the translation
// of getting a PCollectionView<Iterable<T>> does not change
// in bad ways during refactor
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable());
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);
List<Step> steps = job.getSteps();
assertEquals(3, steps.size());
@SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(steps.size() - 2).getProperties().get(PropertyNames.OUTPUT_INFO);
assertTrue(Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
Step collectionToSingletonStep = steps.get(steps.size() - 1);
assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}
Aggregations