use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class DataflowPipelineTranslatorTest method testStreamingSplittableParDoTranslation.
/**
* Smoke test to fail fast if translation of a splittable ParDo in streaming breaks.
*/
@Test
public void testStreamingSplittableParDoTranslation() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(true);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
PCollection<String> windowedInput = pipeline.apply(Create.of("a")).apply(Window.into(FixedWindows.of(Duration.standardMinutes(1))));
windowedInput.apply(ParDo.of(new TestSplittableFn()));
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
Job job = translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList()).getJob();
// The job should contain a SplittableParDo.ProcessKeyedElements step, translated as
// "SplittableProcessKeyed".
List<Step> steps = job.getSteps();
Step processKeyedStep = null;
for (Step step : steps) {
if ("SplittableProcessKeyed".equals(step.getKind())) {
assertNull(processKeyedStep);
processKeyedStep = step;
}
}
assertNotNull(processKeyedStep);
@SuppressWarnings({ "unchecked", "rawtypes" }) DoFnInfo<String, Integer> fnInfo = (DoFnInfo<String, Integer>) SerializableUtils.deserializeFromByteArray(jsonStringToByteArray(getString(processKeyedStep.getProperties(), PropertyNames.SERIALIZED_FN)), "DoFnInfo");
assertThat(fnInfo.getDoFn(), instanceOf(TestSplittableFn.class));
assertThat(fnInfo.getWindowingStrategy().getWindowFn(), Matchers.<WindowFn>equalTo(FixedWindows.of(Duration.standardMinutes(1))));
assertThat(fnInfo.getInputCoder(), instanceOf(StringUtf8Coder.class));
Coder<?> restrictionCoder = CloudObjects.coderFromCloudObject((CloudObject) Structs.getObject(processKeyedStep.getProperties(), PropertyNames.RESTRICTION_CODER));
assertEquals(KvCoder.of(SerializableCoder.of(OffsetRange.class), VoidCoder.of()), restrictionCoder);
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class DataflowPipelineTranslatorTest method runStreamingGroupIntoBatchesAndGetJobSpec.
private JobSpecification runStreamingGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setExperiments(experiments);
options.setStreaming(true);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
if (withShardedKey) {
input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
} else {
input.apply(GroupIntoBatches.ofSize(3));
}
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class DataflowPipelineTranslatorTest method runBatchGroupIntoBatchesAndGetJobSpec.
private JobSpecification runBatchGroupIntoBatchesAndGetJobSpec(Boolean withShardedKey, List<String> experiments) throws IOException {
DataflowPipelineOptions options = buildPipelineOptions();
options.setExperiments(experiments);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
PCollection<KV<Integer, String>> input = pipeline.apply(Create.of(Arrays.asList(KV.of(1, "1"), KV.of(2, "2"), KV.of(3, "3"))));
if (withShardedKey) {
input.apply(GroupIntoBatches.<Integer, String>ofSize(3).withShardedKey());
} else {
input.apply(GroupIntoBatches.ofSize(3));
}
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceV1Transforms(pipeline);
SdkComponents sdkComponents = createSdkComponents(options);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, sdkComponents, true);
return translator.translate(pipeline, pipelineProto, sdkComponents, runner, Collections.emptyList());
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class DataflowPipelineTranslatorTest method testResourceHintsTranslationsResolvesHintsOnOptionsAndComposites.
@Test
public void testResourceHintsTranslationsResolvesHintsOnOptionsAndComposites() {
ResourceHintsOptions options = PipelineOptionsFactory.as(ResourceHintsOptions.class);
options.setResourceHints(Arrays.asList("accelerator=set_via_options", "minRam=1B"));
Pipeline pipeline = Pipeline.create(options);
PCollection<byte[]> root = pipeline.apply(Impulse.create());
root.apply(new Outer().setResourceHints(ResourceHints.create().withAccelerator("set_on_outer_transform").withMinRam(20)));
root.apply("Leaf", ParDo.of(new IdentityDoFn<byte[]>()));
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline, false);
assertThat(pipelineProto.getComponents().getEnvironmentsMap().get(getLeafTransform(pipelineProto, "Leaf").getEnvironmentId()).getResourceHintsMap(), org.hamcrest.Matchers.allOf(org.hamcrest.Matchers.hasEntry("beam:resources:min_ram_bytes:v1", ByteString.copyFromUtf8("1")), org.hamcrest.Matchers.hasEntry("beam:resources:accelerator:v1", ByteString.copyFromUtf8("set_via_options"))));
assertThat(pipelineProto.getComponents().getEnvironmentsMap().get(getLeafTransform(pipelineProto, "Innermost").getEnvironmentId()).getResourceHintsMap(), org.hamcrest.Matchers.allOf(org.hamcrest.Matchers.hasEntry("beam:resources:min_ram_bytes:v1", ByteString.copyFromUtf8("20")), org.hamcrest.Matchers.hasEntry("beam:resources:accelerator:v1", ByteString.copyFromUtf8("set_in_inner_transform"))));
}
use of org.apache.beam.model.pipeline.v1.RunnerApi.PCollection in project beam by apache.
the class GreedyStageFuser method anyInputsSideInputs.
private static boolean anyInputsSideInputs(PTransformNode consumer, QueryablePipeline pipeline) {
for (String inputPCollectionId : consumer.getTransform().getInputsMap().values()) {
RunnerApi.PCollection pCollection = pipeline.getComponents().getPcollectionsMap().get(inputPCollectionId);
PCollectionNode pCollectionNode = PipelineNode.pCollection(inputPCollectionId, pCollection);
if (!pipeline.getSingletonConsumers(pCollectionNode).isEmpty()) {
return true;
}
}
return false;
}
Aggregations