use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowRunnerTest method testRun.
@Test
public void testRun() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = buildDataflowPipeline(options);
DataflowPipelineJob job = (DataflowPipelineJob) p.run();
assertEquals("newid", job.getJobId());
ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
assertValidJob(jobCaptor.getValue());
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowRunnerTest method testRunWithFiles.
@Test
public void testRunWithFiles() throws IOException {
// Test that the function DataflowRunner.stageFiles works as expected.
final String cloudDataflowDataset = "somedataset";
// Create some temporary files.
File temp1 = File.createTempFile("DataflowRunnerTest", "txt");
temp1.deleteOnExit();
File temp2 = File.createTempFile("DataflowRunnerTest2", "txt");
temp2.deleteOnExit();
String overridePackageName = "alias.txt";
when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))).thenReturn(ImmutableList.of(GcsUtil.StorageObjectOrIOException.create(new FileNotFoundException("some/path"))));
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setFilesToStage(ImmutableList.of(temp1.getAbsolutePath(), overridePackageName + "=" + temp2.getAbsolutePath()));
options.setStagingLocation(VALID_STAGING_BUCKET);
options.setTempLocation(VALID_TEMP_BUCKET);
options.setTempDatasetId(cloudDataflowDataset);
options.setProject(PROJECT_ID);
options.setRegion(REGION_ID);
options.setJobName("job");
options.setDataflowClient(buildMockDataflow());
options.setGcsUtil(mockGcsUtil);
options.setGcpCredential(new TestCredential());
when(mockGcsUtil.create(any(GcsPath.class), anyString(), anyInt())).then(new Answer<SeekableByteChannel>() {
@Override
public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
}
});
Pipeline p = buildDataflowPipeline(options);
DataflowPipelineJob job = (DataflowPipelineJob) p.run();
assertEquals("newid", job.getJobId());
ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
Job workflowJob = jobCaptor.getValue();
assertValidJob(workflowJob);
assertEquals(2, workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().size());
DataflowPackage workflowPackage1 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(0);
assertThat(workflowPackage1.getName(), startsWith(temp1.getName()));
DataflowPackage workflowPackage2 = workflowJob.getEnvironment().getWorkerPools().get(0).getPackages().get(1);
assertEquals(overridePackageName, workflowPackage2.getName());
assertEquals(GcsPath.fromUri(VALID_TEMP_BUCKET).toResourceName(), workflowJob.getEnvironment().getTempStoragePrefix());
assertEquals(cloudDataflowDataset, workflowJob.getEnvironment().getDataset());
assertEquals(ReleaseInfo.getReleaseInfo().getName(), workflowJob.getEnvironment().getUserAgent().get("name"));
assertEquals(ReleaseInfo.getReleaseInfo().getVersion(), workflowJob.getEnvironment().getUserAgent().get("version"));
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowRunnerTest method buildPipelineOptions.
private DataflowPipelineOptions buildPipelineOptions() throws IOException {
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setRunner(DataflowRunner.class);
options.setProject(PROJECT_ID);
options.setTempLocation(VALID_TEMP_BUCKET);
options.setRegion(REGION_ID);
// Set FILES_PROPERTY to empty to prevent a default value calculated from classpath.
options.setFilesToStage(new LinkedList<String>());
options.setDataflowClient(buildMockDataflow());
options.setGcsUtil(mockGcsUtil);
options.setGcpCredential(new TestCredential());
// Configure the FileSystem registrar to use these options.
FileSystems.setDefaultPipelineOptions(options);
return options;
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowRunnerTest method testNonGcsTempLocation.
@Test
public void testNonGcsTempLocation() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setTempLocation("file://temp/location");
thrown.expect(IllegalArgumentException.class);
thrown.expectMessage("DataflowRunner requires gcpTempLocation, " + "but failed to retrieve a value from PipelineOptions");
DataflowRunner.fromOptions(options);
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowGroupByKeyTest method createTestServiceRunner.
/**
* Create a test pipeline that uses the {@link DataflowRunner} so that {@link GroupByKey}
* is not expanded. This is used for verifying that even without expansion the proper errors show
* up.
*/
private Pipeline createTestServiceRunner() {
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setRunner(DataflowRunner.class);
options.setProject("someproject");
options.setGcpTempLocation("gs://staging");
options.setPathValidatorClass(NoopPathValidator.class);
options.setDataflowClient(dataflow);
return Pipeline.create(options);
}
Aggregations