use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowPipelineTranslatorTest method testGoodWildcards.
/**
* This tests a few corner cases that should not crash.
*/
@Test
public void testGoodWildcards() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline pipeline = Pipeline.create(options);
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
applyRead(pipeline, "gs://bucket/foo");
applyRead(pipeline, "gs://bucket/foo/");
applyRead(pipeline, "gs://bucket/foo/*");
applyRead(pipeline, "gs://bucket/foo/?");
applyRead(pipeline, "gs://bucket/foo/[0-9]");
applyRead(pipeline, "gs://bucket/foo/*baz*");
applyRead(pipeline, "gs://bucket/foo/*baz?");
applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
applyRead(pipeline, "gs://bucket/foo/baz/*");
applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
applyRead(pipeline, "gs://bucket/foo*/baz");
applyRead(pipeline, "gs://bucket/foo?/baz");
applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
// Check that translation doesn't fail.
JobSpecification jobSpecification = t.translate(pipeline, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowPipelineTranslatorTest method testBatchStatefulParDoTranslation.
/**
* Smoke test to fail fast if translation of a stateful ParDo
* in batch breaks.
*/
@Test
public void testBatchStatefulParDoTranslation() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {
};
pipeline.apply(Create.of(KV.of(1, 1))).apply(ParDo.of(new DoFn<KV<Integer, Integer>, Integer>() {
@StateId("unused")
final StateSpec<ValueState<Integer>> stateSpec = StateSpecs.value(VarIntCoder.of());
@ProcessElement
public void process(ProcessContext c) {
// noop
}
}).withOutputTags(mainOutputTag, TupleTagList.empty()));
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
// The job should look like:
// 0. ParallelRead (Create)
// 1. ParDo(ReifyWVs)
// 2. GroupByKeyAndSortValuesONly
// 3. A ParDo over grouped and sorted KVs that is executed via ungrouping service-side
List<Step> steps = job.getSteps();
assertEquals(4, steps.size());
Step createStep = steps.get(0);
assertEquals("ParallelRead", createStep.getKind());
Step reifyWindowedValueStep = steps.get(1);
assertEquals("ParallelDo", reifyWindowedValueStep.getKind());
Step gbkStep = steps.get(2);
assertEquals("GroupByKey", gbkStep.getKind());
Step statefulParDoStep = steps.get(3);
assertEquals("ParallelDo", statefulParDoStep.getKind());
assertThat((String) statefulParDoStep.getProperties().get(PropertyNames.USES_KEYED_STATE), not(equalTo("true")));
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowPipelineTranslatorTest method testNetworkConfigMissing.
@Test
public void testNetworkConfigMissing() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowRunnerTest method testUpdate.
@Test
public void testUpdate() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setUpdate(true);
options.setJobName("oldJobName");
Pipeline p = buildDataflowPipeline(options);
DataflowPipelineJob job = (DataflowPipelineJob) p.run();
assertEquals("newid", job.getJobId());
ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
assertValidJob(jobCaptor.getValue());
}
use of org.apache.beam.runners.dataflow.options.DataflowPipelineOptions in project beam by apache.
the class DataflowRunnerTest method testTemplateRunnerFullCompletion.
/**
* Tests that the {@link DataflowRunner} with {@code --templateLocation} returns normally
* when the runner issuccessfully run.
*/
@Test
public void testTemplateRunnerFullCompletion() throws Exception {
File existingFile = tmpFolder.newFile();
DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
options.setJobName("TestJobName");
options.setGcpCredential(new TestCredential());
options.setPathValidatorClass(NoopPathValidator.class);
options.setProject("test-project");
options.setRunner(DataflowRunner.class);
options.setTemplateLocation(existingFile.getPath());
options.setTempLocation(tmpFolder.getRoot().getPath());
Pipeline p = Pipeline.create(options);
p.run();
expectedLogs.verifyInfo("Template successfully created");
}
Aggregations