Search in sources :

Example 51 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testRun.

@Test
public void testRun() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    Pipeline p = buildDataflowPipeline(options);
    DataflowPipelineJob job = (DataflowPipelineJob) p.run();
    assertEquals("newid", job.getJobId());
    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
    Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
    assertValidJob(jobCaptor.getValue());
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Job(com.google.api.services.dataflow.model.Job) DataflowRunner.getContainerImageForJob(org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 52 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testRunWithFiles.

@Test
public void testRunWithFiles() throws IOException {
    // Test that the function DataflowRunner.stageFiles works as expected.
    final String cloudDataflowDataset = "somedataset";
    // Create some temporary files.
    File temp1 = File.createTempFile("DataflowRunnerTest", "txt");
    temp1.deleteOnExit();
    File temp2 = File.createTempFile("DataflowRunnerTest2", "txt");
    temp2.deleteOnExit();
    String overridePackageName = "alias.txt";
    when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))).thenReturn(ImmutableList.of(GcsUtil.StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setFilesToStage(ImmutableList.of(temp1.getAbsolutePath(), overridePackageName + "=" + temp2.getAbsolutePath()));
    options.setStagingLocation(VALID_STAGING_BUCKET);
    options.setTempLocation(VALID_TEMP_BUCKET);
    options.setTempDatasetId(cloudDataflowDataset);
    options.setProject(PROJECT_ID);
    options.setRegion(REGION_ID);
    options.setJobName("job");
    options.setDataflowClient(buildMockDataflow());
    options.setGcsUtil(mockGcsUtil);
    options.setGcpCredential(new TestCredential());
    when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt())).then(new Answer<SeekableByteChannel>() {

        @Override
        public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
            return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
        }
    });
    Pipeline p = buildDataflowPipeline(options);
    DataflowPipelineJob job = (DataflowPipelineJob) p.run();
    assertEquals("newid", job.getJobId());
    ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
    Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
    Job workflowJob = jobCaptor.getValue();
    assertValidJob(workflowJob);
    assertEquals(2, workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
    DataflowPackage workflowPackage1 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
    assertThat(workflowPackage1.getName(), startsWith(temp1.getName()));
    DataflowPackage workflowPackage2 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
    assertEquals(overridePackageName, workflowPackage2.getName());
    assertEquals(GcsPath.fromUri(VALID_TEMP_BUCKET).toResourceName(), workflowJob.getEnvironment().getTempStoragePrefix());
    assertEquals(cloudDataflowDataset, workflowJob.getEnvironment().getDataset());
    assertEquals(ReleaseInfo.getReleaseInfo().getName(), workflowJob.getEnvironment().getUserAgent().get("name"));
    assertEquals(ReleaseInfo.getReleaseInfo().getVersion(), workflowJob.getEnvironment().getUserAgent().get("version"));
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) FileNotFoundException(java.io.FileNotFoundException) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) SeekableByteChannel(java.nio.channels.SeekableByteChannel) TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) InvocationOnMock(org.mockito.invocation.InvocationOnMock) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) Job(com.google.api.services.dataflow.model.Job) DataflowRunner.getContainerImageForJob(org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob) File(java.io.File) DataflowPackage(com.google.api.services.dataflow.model.DataflowPackage) Test(org.junit.Test)

Example 53 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method buildPipelineOptions.

private DataflowPipelineOptions buildPipelineOptions() throws IOException {
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setRunner(DataflowRunner.class);
    options.setProject(PROJECT_ID);
    options.setTempLocation(VALID_TEMP_BUCKET);
    options.setRegion(REGION_ID);
    // Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
    options.setFilesToStage(new LinkedList<String>());
    options.setDataflowClient(buildMockDataflow());
    options.setGcsUtil(mockGcsUtil);
    options.setGcpCredential(new TestCredential());
    // Configure the FileSystem registrar to use these options.
    FileSystems.setDefaultPipelineOptions(options);
    return options;
}
Also used : TestCredential(org.apache.beam.sdk.extensions.gcp.auth.TestCredential) DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Matchers.containsString(org.hamcrest.Matchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString)

Example 54 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowRunnerTest method testNonGcsTempLocation.

@Test
public void testNonGcsTempLocation() throws IOException {
    DataflowPipelineOptions options = buildPipelineOptions();
    options.setTempLocation("file://temp/location");
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("DataflowRunner requires gcpTempLocation, " + "but failed to retrieve a value from PipelineOptions");
    DataflowRunner.fromOptions(options);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions) Test(org.junit.Test)

Example 55 with DataflowPipelineOptions

use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.

the class DataflowGroupByKeyTest method createTestServiceRunner.

/**
   * Create a test pipeline that uses the {@link DataflowRunner} so that {@link GroupByKey}
   * is not expanded. This is used for verifying that even without expansion the proper errors show
   * up.
   */
private Pipeline createTestServiceRunner() {
    DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
    options.setRunner(DataflowRunner.class);
    options.setProject("someproject");
    options.setGcpTempLocation("gs://staging");
    options.setPathValidatorClass(NoopPathValidator.class);
    options.setDataflowClient(dataflow);
    return Pipeline.create(options);
}
Also used : DataflowPipelineOptions(org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)

Aggregations

DataflowPipelineOptions (org.apache.beam.runners.dataflow.options.DataflowPipelineOptions)75 Test (org.junit.Test)66 Pipeline (org.apache.beam.sdk.Pipeline)39 Job (com.google.api.services.dataflow.model.Job)31 DataflowPackage (com.google.api.services.dataflow.model.DataflowPackage)22 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)14 TestCredential (org.apache.beam.sdk.extensions.gcp.auth.TestCredential)14 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)14 DataflowRunner.getContainerImageForJob (org.apache.beam.runners.dataflow.DataflowRunner.getContainerImageForJob)10 Step (com.google.api.services.dataflow.model.Step)8 Matchers.containsString (org.hamcrest.Matchers.containsString)7 Matchers.anyString (org.mockito.Matchers.anyString)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 Map (java.util.Map)6 Structs.addObject (org.apache.beam.runners.dataflow.util.Structs.addObject)6 ImmutableList (com.google.common.collect.ImmutableList)4 ArrayList (java.util.ArrayList)4 LinkedList (java.util.LinkedList)4 List (java.util.List)4 TupleTagList (org.apache.beam.sdk.values.TupleTagList)4