Search in sources :

Example 6 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testApplyIsScopedToExactClass.

@Test
public void testApplyIsScopedToExactClass() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline p = Pipeline.create(options);
    Create.TimestampedValues<String> transform = Create.timestamped(Arrays.asList(TimestampedValue.of("TestString", Instant.now())));
    p.apply(transform);
    CompositeTransformRecorder recorder = new CompositeTransformRecorder();
    p.traverseTopologically(recorder);
    // The recorder will also have seen a Create.Values composite as well, but we can't obtain that
    // transform.
    assertThat("Expected to have seen CreateTimestamped composite transform.", recorder.getCompositeTransforms(), hasItem(transform));
    assertThat("Expected to have two composites, CreateTimestamped and Create.Values", recorder.getCompositeTransforms(), hasItem(Matchers.<PTransform<?, ?>>isA((Class) Create.Values.class)));
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Create(org.apache.beam.sdk.transforms.Create) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 7 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testNonExistentTempLocation.

@Test
public void testNonExistentTempLocation() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setGcpTempLocation(NON_EXISTENT_BUCKET);
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage(containsString("Output path does not exist or is not writeable: " + NON_EXISTENT_BUCKET));
    DataflowRunner.fromOptions(options);
    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
    Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
    assertValidJob(jobCaptor.getValue());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowRunner.getContainerImageForJob(org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob) Test(org.junit.Test)

Example 8 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testInvalidStagingLocation.

@Test
public void testInvalidStagingLocation() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setStagingLocation("file://my/staging/location");
    try {
        DataflowRunner.fromOptions(options);
        fail("fromOptions should have failed");
    } catch (IllegalArgumentException e) {
        assertThat(e.getMessage(), containsString("Expected a valid 'gs://' path but was given"));
    }
    options.setStagingLocation("my/staging/location");
    try {
        DataflowRunner.fromOptions(options);
        fail("fromOptions should have failed");
    } catch (IllegalArgumentException e) {
        assertThat(e.getMessage(), containsString("Expected a valid 'gs://' path but was given"));
    }
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Test(org.junit.Test)

Example 9 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testInvalidJobName.

@Test
public void testInvalidJobName() throws IOException {
    List<String> invalidNames = Arrays.asList("invalid_name", "0invalid", "invalid-");
    List<String> expectedReason = Arrays.asList("JobName invalid", "JobName invalid", "JobName invalid");
    for (int i = 0; i < invalidNames.size(); ++i) {
        DataflowPipelineOptions options = buildPipelineOptions();
        options.setJobName(invalidNames.get(i));
        try {
            DataflowRunner.fromOptions(options);
            fail("Expected IllegalArgumentException for jobName " + options.getJobName());
        } catch (IllegalArgumentException e) {
            assertThat(e.getMessage(), containsString(expectedReason.get(i)));
        }
    }
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) Test(org.junit.Test)

Example 10 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowPipelineTranslatorTest method testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet.

@Test
public void testMaxNumWorkersIsPassedWhenNoAlgorithmIsSet() throws IOException {
    final DataflowPipelineWorkerPoolOptions.AutoscalingAlgorithmType noScaling = null;
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setMaxNumWorkers(42);
    options.setAutoscalingAlgorithm(noScaling);
    Pipeline p = buildPipeline(options);
    p.traverseTopologically(new RecordingPipelineVisitor());
    Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
    assertEquals(1, job.getEnvironment().getWorkerPools().size());
    assertNull(job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getAlgorithm());
    assertEquals(42, job.getEnvironment().getWorkerPools().get(0).getAutoscalingSettings().getMaxNumWorkers().intValue());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) DataflowPipelineWorkerPoolOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineWorkerPoolOptions) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Aggregations

DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)75 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)39 Job (com.google.api.services.dataflow.model.Job)31 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)22 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)14 TestCredential (org.apache.beam.sdk.extensions.gcp.auth.TestCredential)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)14 DataflowRunner.getContainerImageForJob (org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob)10 Step (com.google.api.services.dataflow.model.Step)8 Matchers.containsString (org.hamcrest.Matchers.containsString)7 Matchers.anyString (org.mockito.Matchers.anyString)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Map (java.util.Map)6 Structs.addObject (org.apache.beam.runners.dataflow.util.Structs.addObject)6 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4 LinkedList (java.util.LinkedList)4 List (java.util.List)4 TupleTagList (org.apache.beam.sdk.values.TupleTagList)4