use of org.apache.beam.runners.dataflow.DataflowPipelineTranslator in project beam by apache.
the class WorkerCustomSourcesTest method translateIOToCloudSource.
static com.google.api.services.dataflow.model.Source translateIOToCloudSource(BoundedSource<?> io, DataflowPipelineOptions options) throws Exception {
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline p = Pipeline.create(options);
p.begin().apply(Read.from(io));
// Note that we specifically perform this replacement since this is what the DataflowRunner
// does and the DataflowRunner class does not expose a way to perform these replacements
// without running the pipeline.
p.replaceAll(Collections.singletonList(SplittableParDo.PRIMITIVE_BOUNDED_READ_OVERRIDE));
DataflowRunner runner = DataflowRunner.fromOptions(options);
SdkComponents sdkComponents = SdkComponents.create();
RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment("dummy-image-url");
sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job workflow = translator.translate(p, pipelineProto, sdkComponents, runner, new ArrayList<DataflowPackage>()).getJob();
Step step = workflow.getSteps().get(0);
return stepToCloudSource(step);
}
Aggregations