use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method createSdkComponents.
private SdkComponents createSdkComponents(PipelineOptions options) {
SdkComponents sdkComponents = SdkComponents.create();
String containerImageURL = DataflowRunner.getContainerImageForJob(options.as(DataflowPipelineOptions.class));
RunnerApi.Environment defaultEnvironmentForDataflow = Environments.createDockerEnvironment(containerImageURL);
sdkComponents.registerEnvironment(defaultEnvironmentForDataflow);
return sdkComponents;
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method runBatchGroupIntoBatchesAndGetJobSpec.
private JobSpecification runBatchGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setExperiments(experiments);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
if (withShardedKey) {
input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
} else {
input.apply(GroupIntoBatches.ofSize(3));
}
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testNetworkConfigMissing.
@Test
public void testNetworkConfigMissing() throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = buildPipeline(options);
p.traverseTopologically(new RecordingPipelineVisitor());
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
Job job = DataflowPipelineTranslator.fromOptions(options).translate(p, pipelineProto, sdkComponents, DataflowRunner.fromOptions(options), Collections.emptyList()).getJob();
assertEquals(1, job.getEnvironment().getWorkerPools().size());
assertNull(job.getEnvironment().getWorkerPools().get(0).getNetwork());
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class DataflowPipelineTranslatorTest method testTaggedNamesOverridden.
/**
* Test that in translation the name for collections of a multi-output ParDo - a special case
* because the user can name tags - are overridden to be what the Dataflow service expects.
*/
@Test
public void testTaggedNamesOverridden() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> tag1 = new TupleTag<Integer>("frazzle") {
};
TupleTag<Integer> tag2 = new TupleTag<Integer>("bazzle") {
};
TupleTag<Integer> tag3 = new TupleTag<Integer>() {
};
PCollectionTuple outputs = pipeline.apply(Create.of(3)).apply(ParDo.of(new DoFn<Integer, Integer>() {
@ProcessElement
public void drop() {
}
}).withOutputTags(tag1, TupleTagList.of(tag2).and(tag3)));
outputs.get(tag1).setName("bizbazzle");
outputs.get(tag2).setName("gonzaggle");
outputs.get(tag3).setName("froonazzle");
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
// The ParDo step
Step step = job.getSteps().get(1);
String stepName = getString(step.getProperties(), PropertyNames.USER_NAME);
List<Map<String, Object>> outputInfos = Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null);
assertThat(outputInfos.size(), equalTo(3));
// The names set by the user _and_ the tags _must_ be ignored, or metrics will not show up.
for (int i = 0; i < outputInfos.size(); ++i) {
assertThat(getString(outputInfos.get(i), PropertyNames.USER_NAME), equalTo(String.format("%s.out%s", stepName, i)));
}
}
use of org.apache.beam.runners.core.construction.SdkComponents in project beam by apache.
the class WindowMappingFnRunnerTest method testWindowMapping.
@Test
public void testWindowMapping() throws Exception {
String pTransformId = "pTransformId";
SdkComponents components = SdkComponents.create();
components.registerEnvironment(Environments.createDockerEnvironment("java"));
RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder().setUrn(WindowMappingFnRunner.URN).setPayload(ParDoTranslation.translateWindowMappingFn(new GlobalWindows().getDefaultWindowMappingFn(), components).toByteString()).build();
RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder().setSpec(functionSpec).build();
ThrowingFunction<KV<Object, BoundedWindow>, KV<Object, BoundedWindow>> mapFunction = WindowMappingFnRunner.createMapFunctionForPTransform(pTransformId, pTransform);
KV<Object, BoundedWindow> input = KV.of("abc", new IntervalWindow(Instant.now(), Duration.standardMinutes(1)));
assertEquals(KV.of(input.getKey(), GlobalWindow.INSTANCE), mapFunction.apply(input));
}
Aggregations