use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class BatchStatefulParDoOverridesTest method testMultiOutputOverrideNonCrashing.
@Test
public void testMultiOutputOverrideNonCrashing() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
options.setRunner(DataflowRunner.class);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {
};
DummyStatefulDoFn fn = new DummyStatefulDoFn();
pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.empty()));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testTaggedNamesOverridden.
/**
* Test that in translation the name for collections of a multi-output ParDo - a special case
* because the user can name tags - are overridden to be what the Dataflow service expects.
*/
@Test
public void testTaggedNamesOverridden() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
DataflowRunner runner = DataflowRunner.fromOptions(options);
options.setStreaming(false);
DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
Pipeline pipeline = Pipeline.create(options);
TupleTag<Integer> tag1 = new TupleTag<Integer>("frazzle") {
};
TupleTag<Integer> tag2 = new TupleTag<Integer>("bazzle") {
};
TupleTag<Integer> tag3 = new TupleTag<Integer>() {
};
PCollectionTuple outputs = pipeline.apply(Create.of(3)).apply(ParDo.of(new DoFn<Integer, Integer>() {
@ProcessElement
public void drop() {
}
}).withOutputTags(tag1, TupleTagList.of(tag2).and(tag3)));
outputs.get(tag1).setName("bizbazzle");
outputs.get(tag2).setName("gonzaggle");
outputs.get(tag3).setName("froonazzle");
runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.<DataflowPackage>emptyList()).getJob();
// The ParDo step
Step step = job.getSteps().get(1);
String stepName = Structs.getString(step.getProperties(), PropertyNames.USER_NAME);
List<Map<String, Object>> outputInfos = Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null);
assertThat(outputInfos.size(), equalTo(3));
// The names set by the user _and_ the tags _must_ be ignored, or metrics will not show up.
for (int i = 0; i < outputInfos.size(); ++i) {
assertThat(Structs.getString(outputInfos.get(i), PropertyNames.USER_NAME), equalTo(String.format("%s.out%s", stepName, i)));
}
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DataflowPipelineTranslatorTest method testMultiGraphPipelineSerialization.
@Test
public void testMultiGraphPipelineSerialization() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
Pipeline p = Pipeline.create(options);
PCollection<Integer> input = p.begin().apply(Create.of(1, 2, 3));
input.apply(new UnrelatedOutputCreator());
input.apply(new UnboundOutputCreator());
DataflowPipelineTranslator t = DataflowPipelineTranslator.fromOptions(PipelineOptionsFactory.as(DataflowPipelineOptions.class));
// Check that translation doesn't fail.
JobSpecification jobSpecification = t.translate(p, DataflowRunner.fromOptions(options), Collections.<DataflowPackage>emptyList());
assertAllStepOutputsHaveUniqueIds(jobSpecification.getJob());
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class BatchStatefulParDoOverridesTest method testSingleOutputOverrideNonCrashing.
@Test
public void testSingleOutputOverrideNonCrashing() throws Exception {
DataflowPipelineOptions options = buildPipelineOptions();
options.setRunner(DataflowRunner.class);
Pipeline pipeline = Pipeline.create(options);
DummyStatefulDoFn fn = new DummyStatefulDoFn();
pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn));
DataflowRunner runner = DataflowRunner.fromOptions(options);
runner.replaceTransforms(pipeline);
assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}
use of org.apache.beam.sdk.Pipeline in project beam by apache.
the class DisplayDataEvaluator method displayDataForPrimitiveTransforms.
/**
* Traverse the specified {@link PTransform}, collecting {@link DisplayData} registered on the
* inner primitive {@link PTransform PTransforms}.
*
* @param root The root {@link PTransform} to traverse
* @param inputCoder The coder to set for the {@link PTransform} input, or null to infer the
* default coder.
*
* @return the set of {@link DisplayData} for primitive {@link PTransform PTransforms}.
*/
public <InputT> Set<DisplayData> displayDataForPrimitiveTransforms(final PTransform<? super PCollection<InputT>, ? extends POutput> root, Coder<InputT> inputCoder) {
Create.Values<InputT> input;
if (inputCoder != null) {
input = Create.empty(inputCoder);
} else {
// These types don't actually work, but the pipeline will never be run
input = (Create.Values<InputT>) Create.empty(VoidCoder.of());
}
Pipeline pipeline = Pipeline.create(options);
pipeline.apply("Input", input).apply("Transform", root);
return displayDataForPipeline(pipeline, root);
}
Aggregations