Search in sources :

Example 61 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testGcsUploadBufferSizeIsUnsetForBatchWhenDefault.

@Test
public void testGcsUploadBufferSizeIsUnsetForBatchWhenDefault() throws IOException {
    DataflowPipelineOptions batchOptions = buildPipelineOptions();
    batchOptions.setRunner(DataflowRunner.class);
    Pipeline.create(batchOptions);
    assertNull(batchOptions.getGcsUploadBufferSizeBytes());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Test(org.junit.Test)

Example 62 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testInvalidNumberOfWorkerHarnessThreads.

@Test
public void testInvalidNumberOfWorkerHarnessThreads() throws IOException {
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    FileSystems.setDefaultPipelineOptions(options);
    options.setRunner(DataflowRunner.class);
    options.setProject("foo-12345");
    options.setGcpTempLocation(VALID_TEMP_BUCKET);
    options.setGcsUtil(mockGcsUtil);
    options.as(DataflowPipelineDebugOptions.class).setNumberOfWorkerHarnessThreads(-1);
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Number of worker harness threads");
    thrown.expectMessage("Please make sure the value is non-negative.");
    DataflowRunner.fromOptions(options);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) DataflowPipelineDebugOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions) Test(org.junit.Test)

Example 63 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testRunReturnDifferentRequestId.

@Test
public void testRunReturnDifferentRequestId() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    Dataflow mockDataflowClient = options.getDataflowClient();
    Dataflow.Projects.Locations.Jobs.Create mockRequest = mock(Dataflow.Projects.Locations.Jobs.Create.class);
    when(mockDataflowClient.projects().locations().jobs().create(eq(PROJECT_ID), eq(REGION_ID), any(Job.class))).thenReturn(mockRequest);
    Job resultJob = new Job();
    resultJob.setId("newid");
    // Return a different request id.
    resultJob.setClientRequestId("different_request_id");
    when(mockRequest.execute()).thenReturn(resultJob);
    Pipeline p = buildDataflowPipeline(options);
    try {
        p.run();
        fail("Expected DataflowJobAlreadyExistsException");
    } catch (DataflowJobAlreadyExistsException expected) {
        assertThat(expected.getMessage(), containsString("If you want to submit a second job, try again by setting a " + "different name using --jobName."));
        assertEquals(expected.getJob().getJobId(), resultJob.getId());
    }
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowRunner.getContainerImageForJob(org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob) Dataflow(com.google.api.services.dataflow.Dataflow) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 64 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testWorkerHarnessContainerImage.

@Test
public void testWorkerHarnessContainerImage() {
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    // default image set
    options.setWorkerHarnessContainerImage("some-container");
    assertThat(getContainerImageForJob(options), equalTo("some-container"));
    // batch, legacy
    options.setWorkerHarnessContainerImage("gcr.io/IMAGE/foo");
    options.setExperiments(null);
    options.setStreaming(false);
    assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-batch/foo"));
    // streaming, legacy
    options.setStreaming(true);
    assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-streaming/foo"));
    // streaming, fnapi
    options.setExperiments(ImmutableList.of("experiment1", "beam_fn_api"));
    assertThat(getContainerImageForJob(options), equalTo("gcr.io/java/foo"));
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Test(org.junit.Test)

Example 65 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testTemplateRunnerLoggedErrorForFile.

/**
   * Tests that the {@link DataflowRunner} with {@code --templateLocation} throws the appropriate
   * exception when an output file is not writable.
   */
@Test
public void testTemplateRunnerLoggedErrorForFile() throws Exception {
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setJobName("TestJobName");
    options.setRunner(DataflowRunner.class);
    options.setTemplateLocation("//bad/path");
    options.setProject("test-project");
    options.setTempLocation(tmpFolder.getRoot().getPath());
    options.setGcpCredential(new TestCredential());
    options.setPathValidatorClass(NoopPathValidator.class);
    Pipeline p = Pipeline.create(options);
    thrown.expectMessage("Cannot create output file at");
    thrown.expect(RuntimeException.class);
    p.run();
}
Also used : TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)75 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)39 Job (com.google.api.services.dataflow.model.Job)31 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)22 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)14 TestCredential (org.apache.beam.sdk.extensions.gcp.auth.TestCredential)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)14 DataflowRunner.getContainerImageForJob (org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob)10 Step (com.google.api.services.dataflow.model.Step)8 Matchers.containsString (org.hamcrest.Matchers.containsString)7 Matchers.anyString (org.mockito.Matchers.anyString)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Map (java.util.Map)6 Structs.addObject (org.apache.beam.runners.dataflow.util.Structs.addObject)6 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4 LinkedList (java.util.LinkedList)4 List (java.util.List)4 TupleTagList (org.apache.beam.sdk.values.TupleTagList)4