Search in sources :

Example 1 with Defaults

use of org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults in project beam by apache.

the class TransformHierarchyTest method visitAfterReplace.

/**
 * Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any of
 * the original nodes or inaccessible values but does visit all of the replacement nodes, new
 * inaccessible replacement values, and the original output values.
 */
@Test
public void visitAfterReplace() {
    Node root = hierarchy.getCurrent();
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Map<TupleTag<?>, PCollection<?>> expandedReplacementOutput = (Map) replacementOutput.expand();
    Entry<TupleTag<?>, PCollection<?>> replacementLongs = Iterables.getOnlyElement(expandedReplacementOutput.entrySet());
    hierarchy.replaceOutputs(Collections.singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    hierarchy.popNode();
    final Set<Node> visitedCompositeNodes = new HashSet<>();
    final Set<Node> visitedPrimitiveNodes = new HashSet<>();
    Set<PValue> visitedValues = hierarchy.visit(new Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            visitedCompositeNodes.add(node);
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            visitedPrimitiveNodes.add(node);
        }
    });
    /*
    Final Graph:
    Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out)
    */
    assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode));
    assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode));
    assertThat(visitedValues, containsInAnyOrder(upstream, output));
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) Map(java.util.Map) Test(org.junit.Test)

Example 2 with Defaults

use of org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults in project beam by apache.

the class SplittableParDoTest method testConvertToPrimitiveReadsHappen.

@Test
public void testConvertToPrimitiveReadsHappen() {
    PipelineOptions deprecatedReadOptions = PipelineOptionsFactory.create();
    deprecatedReadOptions.setRunner(CrashingRunner.class);
    ExperimentalOptions.addExperiment(deprecatedReadOptions.as(ExperimentalOptions.class), "use_deprecated_read");
    Pipeline pipeline = Pipeline.create(deprecatedReadOptions);
    pipeline.apply(Read.from(new FakeBoundedSource()));
    pipeline.apply(Read.from(new BoundedToUnboundedSourceAdapter<>(new FakeBoundedSource())));
    SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(pipeline);
    AtomicBoolean sawPrimitiveBoundedRead = new AtomicBoolean();
    AtomicBoolean sawPrimitiveUnboundedRead = new AtomicBoolean();
    pipeline.traverseTopologically(new Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            assertThat(node.getTransform(), not(instanceOf(Read.Bounded.class)));
            assertThat(node.getTransform(), not(instanceOf(Read.Unbounded.class)));
            return super.enterCompositeTransform(node);
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            if (node.getTransform() instanceof SplittableParDo.PrimitiveBoundedRead) {
                sawPrimitiveBoundedRead.set(true);
            } else if (node.getTransform() instanceof SplittableParDo.PrimitiveUnboundedRead) {
                sawPrimitiveUnboundedRead.set(true);
            }
        }
    });
    assertTrue(sawPrimitiveBoundedRead.get());
    assertTrue(sawPrimitiveUnboundedRead.get());
}
Also used : ExperimentalOptions(org.apache.beam.sdk.options.ExperimentalOptions) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) BoundedToUnboundedSourceAdapter(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) Test(org.junit.Test)

Example 3 with Defaults

use of org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults in project beam by apache.

the class SplittableParDoTest method testConvertIsSkippedWhenUsingDeprecatedRead.

@Test
public void testConvertIsSkippedWhenUsingDeprecatedRead() {
    Pipeline sdfRead = Pipeline.create();
    sdfRead.apply(Read.from(new FakeBoundedSource()));
    sdfRead.apply(Read.from(new BoundedToUnboundedSourceAdapter<>(new FakeBoundedSource())));
    SplittableParDo.convertReadBasedSplittableDoFnsToPrimitiveReadsIfNecessary(sdfRead);
    pipeline.traverseTopologically(new Defaults() {

        @Override
        public void visitPrimitiveTransform(Node node) {
            assertThat(node.getTransform(), not(instanceOf(SplittableParDo.PrimitiveBoundedRead.class)));
            assertThat(node.getTransform(), not(instanceOf(SplittableParDo.PrimitiveUnboundedRead.class)));
        }
    });
}
Also used : Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) BoundedToUnboundedSourceAdapter(org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 4 with Defaults

use of org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults in project beam by apache.

the class DeduplicatedFlattenFactoryTest method duplicatesInsertsMultipliers.

@Test
public void duplicatesInsertsMultipliers() {
    PTransform<PCollectionList<String>, PCollection<String>> replacement = new DeduplicatedFlattenFactory.FlattenWithoutDuplicateInputs<>();
    final PCollectionList<String> inputList = PCollectionList.of(first).and(second).and(first).and(first);
    inputList.apply(replacement);
    pipeline.traverseTopologically(new Defaults() {

        @Override
        public void visitPrimitiveTransform(TransformHierarchy.Node node) {
            if (node.getTransform() instanceof Flatten.PCollections) {
                assertThat(node.getInputs(), not(equalTo(inputList.expand())));
            }
        }
    });
}
Also used : PCollectionList(org.apache.beam.sdk.values.PCollectionList) PCollection(org.apache.beam.sdk.values.PCollection) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) TransformHierarchy(org.apache.beam.sdk.runners.TransformHierarchy) Flatten(org.apache.beam.sdk.transforms.Flatten) FlattenWithoutDuplicateInputs(org.apache.beam.runners.core.construction.DeduplicatedFlattenFactory.FlattenWithoutDuplicateInputs) Test(org.junit.Test)

Aggregations

Defaults (org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults)4 Test (org.junit.Test)4 Node (org.apache.beam.sdk.runners.TransformHierarchy.Node)3 BoundedToUnboundedSourceAdapter (org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource.BoundedToUnboundedSourceAdapter)2 Pipeline (org.apache.beam.sdk.Pipeline)2 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)2 PCollection (org.apache.beam.sdk.values.PCollection)2 HashSet (java.util.HashSet)1 Map (java.util.Map)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 FlattenWithoutDuplicateInputs (org.apache.beam.runners.core.construction.DeduplicatedFlattenFactory.FlattenWithoutDuplicateInputs)1 GenerateSequence (org.apache.beam.sdk.io.GenerateSequence)1 ExperimentalOptions (org.apache.beam.sdk.options.ExperimentalOptions)1 PipelineOptions (org.apache.beam.sdk.options.PipelineOptions)1 TransformHierarchy (org.apache.beam.sdk.runners.TransformHierarchy)1 DoFn (org.apache.beam.sdk.transforms.DoFn)1 Flatten (org.apache.beam.sdk.transforms.Flatten)1 PTransform (org.apache.beam.sdk.transforms.PTransform)1 PCollectionList (org.apache.beam.sdk.values.PCollectionList)1 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)1