Search in sources :

Example 46 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testTemplateRunnerLoggedErrorForFile.

/**
   * Tests that the {@link DataflowRunner} with {@code --templateLocation} throws the appropriate
   * exception when an output file is not writable.
   */
@Test
public void testTemplateRunnerLoggedErrorForFile() throws Exception {
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setJobName("TestJobName");
    options.setRunner(DataflowRunner.class);
    options.setTemplateLocation("//bad/path");
    options.setProject("test-project");
    options.setTempLocation(tmpFolder.getRoot().getPath());
    options.setGcpCredential(new TestCredential());
    options.setPathValidatorClass(NoopPathValidator.class);
    Pipeline p = Pipeline.create(options);
    thrown.expectMessage("Cannot create output file at");
    thrown.expect(RuntimeException.class);
    p.run();
}
Also used : TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 47 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testUnconsumedReads.

/**
   * Tests that all reads are consumed by at least one {@link PTransform}.
   */
@Test
public void testUnconsumedReads() throws IOException {
    DataflowPipelineOptions dataflowOptions = buildPipelineOptions();
    RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class);
    Pipeline p = buildDataflowPipeline(dataflowOptions);
    PCollection<String> unconsumed = p.apply(TextIO.read().from(options.getInput()));
    DataflowRunner.fromOptions(dataflowOptions).replaceTransforms(p);
    final AtomicBoolean unconsumedSeenAsInput = new AtomicBoolean();
    p.traverseTopologically(new PipelineVisitor.Defaults() {

        @Override
        public void visitPrimitiveTransform(Node node) {
            unconsumedSeenAsInput.set(true);
        }
    });
    assertThat(unconsumedSeenAsInput.get(), is(true));
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 48 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testInvalidGcpTempLocation.

@Test
public void testInvalidGcpTempLocation() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setGcpTempLocation("file://temp/location");
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage(containsString("Expected a valid 'gs://' path but was given"));
    DataflowRunner.fromOptions(options);
    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
    Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
    assertValidJob(jobCaptor.getValue());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowRunner.getContainerImageForJob(org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob) Test(org.junit.Test)

Example 49 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testProjectId.

@Test
public void testProjectId() throws IOException {
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setRunner(DataflowRunner.class);
    options.setProject("foo-12345");
    options.setGcpTempLocation(VALID_TEMP_BUCKET);
    options.setGcsUtil(mockGcsUtil);
    options.setGcpCredential(new TestCredential());
    DataflowRunner.fromOptions(options);
}
Also used : TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Test(org.junit.Test)

Example 50 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testNonExistentProfileLocation.

@Test
public void testNonExistentProfileLocation() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setSaveProfilesToGcs(NON_EXISTENT_BUCKET);
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage(containsString("Output path does not exist or is not writeable: " + NON_EXISTENT_BUCKET));
    DataflowRunner.fromOptions(options);
    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
    Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
    assertValidJob(jobCaptor.getValue());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowRunner.getContainerImageForJob(org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob) Test(org.junit.Test)

Aggregations

DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)75 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)39 Job (com.google.api.services.dataflow.model.Job)31 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)22 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)14 TestCredential (org.apache.beam.sdk.extensions.gcp.auth.TestCredential)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)14 DataflowRunner.getContainerImageForJob (org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob)10 Step (com.google.api.services.dataflow.model.Step)8 Matchers.containsString (org.hamcrest.Matchers.containsString)7 Matchers.anyString (org.mockito.Matchers.anyString)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Map (java.util.Map)6 Structs.addObject (org.apache.beam.runners.dataflow.util.Structs.addObject)6 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4 LinkedList (java.util.LinkedList)4 List (java.util.List)4 TupleTagList (org.apache.beam.sdk.values.TupleTagList)4