use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method runStreamingGroupIntoBatchesAndGetJobSpec.
private JobSpecification runStreamingGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setExperiments(experiments);
options.setStreaming(true);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
if (withShardedKey) {
input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
} else {
input.apply(GroupIntoBatches.ofSize(3));
}
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testWorkerMachineTypeConfig.
@Test
public void testWorkerMachineTypeConfig() throws IOException {
final String testMachineType = "test-machine-type";
DataflowPipelineOptions options = buildPipelineOptions();
options.setWorkerMachineType(testMachineType);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
WorkerPool workerPool = job.getEnvironment().getWorkerPools().get(0);
assertEquals(testMachineType, workerPool.getMachineType());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testGoodWildcards.
/**
* This tests a few corner cases that should not crash.
*/
@Test
public void testGoodWildcards() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline pipeline = Pipeline.create(options);
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(options);
applyRead(pipeline, "gs://bucket/foo");
applyRead(pipeline, "gs://bucket/foo/");
applyRead(pipeline, "gs://bucket/foo/*");
applyRead(pipeline, "gs://bucket/foo/?");
applyRead(pipeline, "gs://bucket/foo/[0-9]");
applyRead(pipeline, "gs://bucket/foo/*baz*");
applyRead(pipeline, "gs://bucket/foo/*baz?");
applyRead(pipeline, "gs://bucket/foo/[0-9]baz?");
applyRead(pipeline, "gs://bucket/foo/baz/*");
applyRead(pipeline, "gs://bucket/foo/baz/*wonka*");
applyRead(pipeline, "gs://bucket/foo/*baz/wonka*");
applyRead(pipeline, "gs://bucket/foo*/baz");
applyRead(pipeline, "gs://bucket/foo?/baz");
applyRead(pipeline, "gs://bucket/foo[0-9]/baz");
// Check that translation doesn't fail.
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
JobSpecification jobSpecification = t.translate(pipeline, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testDiskSizeGbConfig.
@Test
public void testDiskSizeGbConfig() throws IOException {
final Integer diskSizeGb = 1234;
DataflowPipelineOptions options = buildPipelineOptions();
options.setDiskSizeGb(diskSizeGb);
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertEquals(diskSizeGb, job.getEnvironment().getWorkerPools().get(0).getDiskSizeGb());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method runBatchGroupIntoBatchesAndGetJobSpec.
private JobSpecification runBatchGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setExperiments(experiments);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
if (withShardedKey) {
input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
} else {
input.apply(GroupIntoBatches.ofSize(3));
}
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
Aggregations