Search in sources :

Example 1 with Structs.getString

use of org.apache.beam.runners.dataflow.util.Structs.getString in project beam by apache.

the class DataflowPipelineTranslatorTest method testTaggedNamesOverridden.

/**
   * Test that in translation the name for collections of a multi-output ParDo - a special case
   * because the user can name tags - are overridden to be what the Dataflow service expects.
   */
@Test
public void testTaggedNamesOverridden() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(false);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    TupleTag<Integer> tag1 = new TupleTag<Integer>("frazzle") {
    };
    TupleTag<Integer> tag2 = new TupleTag<Integer>("bazzle") {
    };
    TupleTag<Integer> tag3 = new TupleTag<Integer>() {
    };
    PCollectionTuple outputs = pipeline.apply(Create.of(3)).apply(ParDo.of(new DoFn<Integer, Integer>() {

        @ProcessElement
        public void drop() {
        }
    }).withOutputTags(tag1, TupleTagList.of(tag2).and(tag3)));
    outputs.get(tag1).setName("bizbazzle");
    outputs.get(tag2).setName("gonzaggle");
    outputs.get(tag3).setName("froonazzle");
    runner.replaceTransforms(pipeline);
    Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
    // The ParDo step
    Step step = job.getSteps().get(1);
    String stepName = Structs.getString(step.getProperties(), PropertyNames.USER_NAME);
    List<Map<String, Object>> outputInfos = Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null);
    assertThat(outputInfos.size(), equalTo(3));
    // The names set by the user _and_ the tags _must_ be ignored, or metrics will not show up.
    for (int i = 0; i < outputInfos.size(); ++i) {
        assertThat(Structs.getString(outputInfos.get(i), PropertyNames.USER_NAME), equalTo(String.format("%s.out%s", stepName, i)));
    }
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TupleTag(org.apache.beam.sdk.values.TupleTag) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Pipeline(org.apache.beam.sdk.Pipeline) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Job(com.google.api.services.dataflow.model.Job) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Test(org.junit.Test)

Example 2 with Structs.getString

use of org.apache.beam.runners.dataflow.util.Structs.getString in project beam by apache.

the class DataflowPipelineTranslatorTest method assertAllStepOutputsHaveUniqueIds.

private static void assertAllStepOutputsHaveUniqueIds(Job job) throws Exception {
    List<Long> outputIds = new ArrayList<>();
    for (Step step : job.getSteps()) {
        List<Map<String, Object>> outputInfoList = (List<Map<String, Object>>) step.getProperties().get(PropertyNames.OUTPUT_INFO);
        if (outputInfoList != null) {
            for (Map<String, Object> outputInfo : outputInfoList) {
                outputIds.add(Long.parseLong(Structs.getString(outputInfo, PropertyNames.OUTPUT_NAME)));
            }
        }
    }
    Set<Long> uniqueOutputNames = new HashSet<>(outputIds);
    outputIds.removeAll(uniqueOutputNames);
    assertTrue(String.format("Found duplicate output ids %s", outputIds), outputIds.size() == 0);
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) Structs.addObject(org.apache.beam.runners.dataflow.util.Structs.addObject) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashSet(java.util.HashSet)

Example 3 with Structs.getString

use of org.apache.beam.runners.dataflow.util.Structs.getString in project beam by apache.

the class DataflowPipelineTranslatorTest method testNamesOverridden.

/**
   * Test that in translation the name for a collection (in this case just a Create output) is
   * overriden to be what the Dataflow service expects.
   */
@Test
public void testNamesOverridden() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    options.setStreaming(false);
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle");
    runner.replaceTransforms(pipeline);
    Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
    // The Create step
    Step step = job.getSteps().get(0);
    // This is the name that is "set by the user" that the Dataflow translator must override
    String userSpecifiedName = Structs.getString(Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME);
    // This is the calculated name that must actually be used
    String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0";
    assertThat(userSpecifiedName, equalTo(calculatedName));
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Job(com.google.api.services.dataflow.model.Job) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

Step (com.google.api.services.dataflow.model.Step)3 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)3 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)2 Job (com.google.api.services.dataflow.model.Job)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 Map (java.util.Map)2 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)2 Pipeline (org.apache.beam.sdk.Pipeline)2 Test (org.junit.Test)2 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Structs.addObject (org.apache.beam.runners.dataflow.util.Structs.addObject)1 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)1 TupleTag (org.apache.beam.sdk.values.TupleTag)1 TupleTagList (org.apache.beam.sdk.values.TupleTagList)1