Search in sources :

Example 86 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class DataflowPipelineTranslatorTest method testScalingAlgorithmMissing.

@Test
public void testScalingAlgorithmMissing() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    // Autoscaling settings are always set.
    assertNull(job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
    assertEquals(0, job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getMaxNumWorkers().intValue());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 87 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class DataflowPipelineTranslatorTest method testStepDisplayData.

@Test
public void testStepDisplayData() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    DoFn<Integer, Integer> fn1 = new DoFn<Integer, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(c.element());
        }

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            builder.add(DisplayData.item("foo", "bar")).add(DisplayData.item("foo2", DataflowPipelineTranslatorTest.class).withLabel("Test Class").withLinkUrl("http://www.google.com"));
        }
    };
    DoFn<Integer, Integer> fn2 = new DoFn<Integer, Integer>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            c.output(c.element());
        }

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            builder.add(DisplayData.item("foo3", 1234));
        }
    };
    ParDo.SingleOutput<Integer, Integer> parDo1 = ParDo.of(fn1);
    ParDo.SingleOutput<Integer, Integer> parDo2 = ParDo.of(fn2);
    pipeline.apply(Create.of(1, 2, 3)).apply(parDo1).apply(parDo2);
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceTransforms(pipeline);
    Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
    assertAllStepOutputsHaveUniqueIds(job);
    List<Step> steps = job.getSteps();
    assertEquals(3, steps.size());
    Map<String, Object> parDo1Properties = steps.get(1).getProperties();
    Map<String, Object> parDo2Properties = steps.get(2).getProperties();
    assertThat(parDo1Properties, hasKey("display_data"));
    @SuppressWarnings("unchecked") Collection<Map<String, String>> fn1displayData = (Collection<Map<String, String>>) parDo1Properties.get("display_data");
    @SuppressWarnings("unchecked") Collection<Map<String, String>> fn2displayData = (Collection<Map<String, String>>) parDo2Properties.get("display_data");
    ImmutableSet<ImmutableMap<String, Object>> expectedFn1DisplayData = ImmutableSet.of(ImmutableMap.<String, Object>builder().put("key", "foo").put("type", "STRING").put("value", "bar").put("namespace", fn1.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "fn").put("label", "Transform Function").put("type", "JAVA_CLASS").put("value", fn1.getClass().getName()).put("shortValue", fn1.getClass().getSimpleName()).put("namespace", parDo1.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "foo2").put("type", "JAVA_CLASS").put("value", DataflowPipelineTranslatorTest.class.getName()).put("shortValue", DataflowPipelineTranslatorTest.class.getSimpleName()).put("namespace", fn1.getClass().getName()).put("label", "Test Class").put("linkUrl", "http://www.google.com").build());
    ImmutableSet<ImmutableMap<String, Object>> expectedFn2DisplayData = ImmutableSet.of(ImmutableMap.<String, Object>builder().put("key", "fn").put("label", "Transform Function").put("type", "JAVA_CLASS").put("value", fn2.getClass().getName()).put("shortValue", fn2.getClass().getSimpleName()).put("namespace", parDo2.getClass().getName()).build(), ImmutableMap.<String, Object>builder().put("key", "foo3").put("type", "INTEGER").put("value", 1234L).put("namespace", fn2.getClass().getName()).build());
    assertEquals(expectedFn1DisplayData, ImmutableSet.copyOf(fn1displayData));
    assertEquals(expectedFn2DisplayData, ImmutableSet.copyOf(fn2displayData));
}
Also used : Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Job(com.google.api.services.dataflow.model.Job) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) ImmutableMap(com.google.common.collect.ImmutableMap) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) ParDo(org.apache.beam.sdk.transforms.ParDo) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) Structs.addObject(org.apache.beam.runners.dataflow.util.Structs.addObject) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Test(org.junit.Test)

Example 88 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class DataflowPipelineTranslatorTest method testToSingletonTranslationWithIsmSideInput.

@Test
public void testToSingletonTranslationWithIsmSideInput() throws Exception {
    // A "change detector" test that makes sure the translation
    // of getting a PCollectionView<T> does not change
    // in bad ways during refactor
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    pipeline.apply(Create.of(1)).apply(View.<Integer>asSingleton());
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceTransforms(pipeline);
    Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
    assertAllStepOutputsHaveUniqueIds(job);
    List<Step> steps = job.getSteps();
    assertEquals(5, steps.size());
    @SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(3).getProperties().get(PropertyNames.OUTPUT_INFO);
    assertTrue(Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
    Step collectionToSingletonStep = steps.get(4);
    assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Step(com.google.api.services.dataflow.model.Step) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Pipeline(org.apache.beam.sdk.Pipeline) List(java.util.List) TupleTagList(org.apache.beam.sdk.values.TupleTagList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) Structs.addObject(org.apache.beam.runners.dataflow.util.Structs.addObject) Job(com.google.api.services.dataflow.model.Job) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Test(org.junit.Test)

Example 89 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class DataflowPipelineTranslatorTest method createPredefinedStep.

/**
   * Returns a Step for a {@link DoFn} by creating and translating a pipeline.
   */
private static Step createPredefinedStep() throws Exception {
    DataflowPipelineOptions options = buildPipelineOptions();
    DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
    Pipeline pipeline = Pipeline.create(options);
    String stepName = "DoFn1";
    pipeline.apply("ReadMyFile", TextIO.read().from("gs://bucket/in")).apply(stepName, ParDo.of(new NoOpFn())).apply("WriteMyFile", TextIO.write().to("gs://bucket/out"));
    DataflowRunner runner = DataflowRunner.fromOptions(options);
    runner.replaceTransforms(pipeline);
    Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
    assertEquals(8, job.getSteps().size());
    Step step = job.getSteps().get(1);
    assertEquals(stepName, getString(step.getProperties(), PropertyNames.USER_NAME));
    assertAllStepOutputsHaveUniqueIds(job);
    return step;
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Step(com.google.api.services.dataflow.model.Step) Job(com.google.api.services.dataflow.model.Job) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Pipeline(org.apache.beam.sdk.Pipeline)

Example 90 with Pipeline

use of org.apache.beam.sdk.Pipeline in project beam by apache.

the class DataflowPipelineTranslatorTest method testSubnetworkConfig.

@Test
public void testSubnetworkConfig() throws IOException {
    final String testSubnetwork = "regions/REGION/subnetworks/SUBNETWORK";
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setSubnetwork(testSubnetwork);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertEquals(testSubnetwork, job.getEnvironment().getWorkerPools().get(0).getSubnetwork());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Job(com.google.api.services.dataflow.model.Job) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

Pipeline (org.apache.beam.sdk.Pipeline)184 Test (org.junit.Test)123 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)86 DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)39 KV (org.apache.beam.sdk.values.KV)35 Job (com.google.api.services.dataflow.model.Job)26 DoFn (org.apache.beam.sdk.transforms.DoFn)24 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)22 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)21 TableRow (com.google.api.services.bigquery.model.TableRow)16 PipelineResult (org.apache.beam.sdk.PipelineResult)14 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)13 TableSchema (com.google.api.services.bigquery.model.TableSchema)12 ApexPipelineOptions (org.apache.beam.runners.apex.ApexPipelineOptions)12 Map (java.util.Map)11 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)10 ArrayList (java.util.ArrayList)10 Instant (org.joda.time.Instant)10 TableReference (com.google.api.services.bigquery.model.TableReference)9 JsonSchemaToTableSchema (org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema)9