use of com.google.api.services.dataflow.model.Step in project beam by apache.
the class DataflowPipelineTranslatorTest method assertAllStepOutputsHaveUniqueIds.
private static void assertAllStepOutputsHaveUniqueIds(Job job) throws Exception {
List<Long> outputIds = new ArrayList<>();
for (Step step : job.getSteps()) {
List<Map<String, Object>> outputInfoList = (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
if (outputInfoList != null) {
for (Map<String, Object> outputInfo : outputInfoList) {
outputIds.add(Long.parseLong(Structs.getString(outputInfo, PropertyNames.OUTPUT_NAME)));
}
}
}
Set<Long> uniqueOutputNames = new HashSet<>(outputIds);
outputIds.removeAll(uniqueOutputNames);
assertTrue(String.format("Found duplicate output ids %s", outputIds), outputIds.size() == 0);
}
use of com.google.api.services.dataflow.model.Step in project beam by apache.
the class DataflowPipelineTranslatorTest method testPredefinedAddStep.
@Test
public void testPredefinedAddStep() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
DataflowPipelineTranslator.registerTransformTranslator(EmbeddedTransform.class, new EmbeddedTranslator());
// Create a predefined step using another pipeline
Step predefinedStep = createPredefinedStep();
// Create a pipeline that the predefined step will be embedded into
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("ReadMyFile", TextIO.read().from("gs://bucket/in")).apply(ParDo.of(new NoOpFn())).apply(new EmbeddedTransform(predefinedStep.clone())).apply(ParDo.of(new NoOpFn()));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);
List<Step> steps = job.getSteps();
assertEquals(4, steps.size());
// The input to the embedded step should match the output of the step before
Map<String, Object> step1Out = getOutputPortReference(steps.get(1));
Map<String, Object> step2In = getDictionary(steps.get(2).getProperties(), PropertyNames.PARALLEL_INPUT);
assertEquals(step1Out, step2In);
// The output from the embedded step should match the input of the step after
Map<String, Object> step2Out = getOutputPortReference(steps.get(2));
Map<String, Object> step3In = getDictionary(steps.get(3).getProperties(), PropertyNames.PARALLEL_INPUT);
assertEquals(step2Out, step3In);
// The step should not have been modified other than remapping the input
Step predefinedStepClone = predefinedStep.clone();
Step embeddedStepClone = steps.get(2).clone();
predefinedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
embeddedStepClone.getProperties().remove(PropertyNames.PARALLEL_INPUT);
assertEquals(predefinedStepClone, embeddedStepClone);
}
use of com.google.api.services.dataflow.model.Step in project beam by apache.
the class DataflowPipelineTranslatorTest method testToIterableTranslationWithIsmSideInput.
@Test
public void testToIterableTranslationWithIsmSideInput() throws Exception {
// A "change detector" test that makes sure the translation
// of getting a PCollectionView<Iterable<T>> does not change
// in bad ways during refactor
DataflowPipelineOptions options = buildPipelineOptions();
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply(Create.of(1, 2, 3)).apply(View.<Integer>asIterable());
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);
List<Step> steps = job.getSteps();
assertEquals(3, steps.size());
@SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO);
assertTrue(Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
Step collectionToSingletonStep = steps.get(2);
assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}
use of com.google.api.services.dataflow.model.Step in project beam by apache.
the class DataflowPipelineTranslatorTest method testNamesOverridden.
/**
* Test that in translation the name for a collection (in this case just a Create output) is
* overriden to be what the Dataflow service expects.
*/
@Test
public void testNamesOverridden() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle");
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
// The Create step
Step step = job.getSteps().get(0);
// This is the name that is "set by the user" that the Dataflow translator must override
String userSpecifiedName = Structs.getString(Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME);
// This is the calculated name that must actually be used
String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0";
assertThat(userSpecifiedName, equalTo(calculatedName));
}
Aggregations