use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method createPredefinedStep.
/**
* Returns a Step for a {@link DoFn} by creating and translating a pipeline.
*/
private static Step createPredefinedStep() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
String stepName = "DoFn1";
pipeline.apply("ReadMyFile", TextIO.read().from("gs://bucket/in")).apply(stepName, ParDo.of(new NoOpFn())).apply("WriteMyFile", TextIO.write().to("gs://bucket/out"));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
assertEquals(8, job.getSteps().size());
Step step = job.getSteps().get(1);
assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
assertAllStepOutputsHaveUniqueIds(job);
return step;
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testSubnetworkConfig.
@Test
public void testSubnetworkConfig() throws IOException {
final String testSubnetwork = "regions/REGION/subnetworks/SUBNETWORK";
DataflowPipelineOptions options = buildPipelineOptions();
options.setSubnetwork(testSubnetwork);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(testSubnetwork, job.getEnvironment().getWorkerPools().get(0).getSubnetwork());
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testSettingOfPipelineOptionsWithCustomUserType.
@Test
public void testSettingOfPipelineOptionsWithCustomUserType() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setRunner(DataflowRunner.class);
options.as(JacksonIncompatibleOptions.class).setJacksonIncompatible(new JacksonIncompatible("userCustomTypeTest"));
Pipeline p = Pipeline.create(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
Map<String, Object> sdkPipelineOptions = job.getEnvironment().getSdkPipelineOptions();
assertThat(sdkPipelineOptions, hasKey("options"));
Map<String, Object> optionsMap = (Map<String, Object>) sdkPipelineOptions.get("options");
assertThat(optionsMap, hasEntry("jacksonIncompatible", (Object) "userCustomTypeTest"));
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testPredefinedAddStep.
@Test
public void testPredefinedAddStep() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
DataflowPipelineTranslator.registerTransformTranslator(EmbeddedTransform.class, new EmbeddedTranslator());
// Create a predefined step using another pipeline
Step predefinedStep = createPredefinedStep();
// Create a pipeline that the predefined step will be embedded into
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("ReadMyFile", TextIO.read().from("gs://bucket/in")).apply(ParDo.of(new NoOpFn())).apply(new EmbeddedTransform(predefinedStep.clone())).apply(ParDo.of(new NoOpFn()));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);
List<Step> steps = job.getSteps();
assertEquals(4, steps.size());
// The input to the embedded step should match the output of the step before
Map<String, Object> step1Out = getOutputPortReference(steps.get(1));
Map<String, Object> step2In = getDictionary(steps.get(2).getProperties(), PropertyNames.PARALLEL_INPUT);
assertEquals(step1Out, step2In);
// The output from the embedded step should match the input of the step after
Map<String, Object> step2Out = getOutputPortReference(steps.get(2));
Map<String, Object> step3In = getDictionary(steps.get(3).getProperties(), PropertyNames.PARALLEL_INPUT);
assertEquals(step2Out, step3In);
// The step should not have been modified other than remapping the input
Step predefinedStepClone = predefinedStep.clone();
Step embeddedStepClone = steps.get(2).clone();
predefinedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
embeddedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
assertEquals(predefinedStepClone, embeddedStepClone);
}
use of com.google.api.services.dataflow.model.DataflowPackage in project beam by apache.
the class DataflowPipelineTranslatorTest method testSettingOfSdkPipelineOptions.
@Test
public void testSettingOfSdkPipelineOptions() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setRunner(DataflowRunner.class);
Pipeline p = Pipeline.create(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
Map<String, Object> sdkPipelineOptions = job.getEnvironment().getSdkPipelineOptions();
assertThat(sdkPipelineOptions, hasKey("options"));
Map<String, Object> optionsMap = (Map<String, Object>) sdkPipelineOptions.get("options");
assertThat(optionsMap, hasEntry("appName", (Object) "DataflowPipelineTranslatorTest"));
assertThat(optionsMap, hasEntry("project", (Object) "some-project"));
assertThat(optionsMap, hasEntry("pathValidatorClass", (Object) GcsPathValidator.class.getName()));
assertThat(optionsMap, hasEntry("runner", (Object) DataflowRunner.class.getName()));
assertThat(optionsMap, hasEntry("jobName", (Object) "some-job-name"));
assertThat(optionsMap, hasEntry("tempLocation", (Object) "gs://somebucket/some/path"));
assertThat(optionsMap, hasEntry("stagingLocation", (Object) "gs://somebucket/some/path/staging/"));
assertThat(optionsMap, hasEntry("stableUniqueNames", (Object) "WARNING"));
assertThat(optionsMap, hasEntry("streaming", (Object) false));
assertThat(optionsMap, hasEntry("numberOfWorkerHarnessThreads", (Object) 0));
}
Aggregations