Search in sources :

Example 1 with Node

use of org.apache.beam.sdk.runners.TransformHierarchy.Node in project beam by apache.

the class SdkComponentsTest method translatePipeline.

@Test
public void translatePipeline() {
    BigEndianLongCoder customCoder = BigEndianLongCoder.of();
    PCollection<Long> elems = pipeline.apply(GenerateSequence.from(0L).to(207L));
    PCollection<Long> counted = elems.apply(Count.<Long>globally()).setCoder(customCoder);
    PCollection<Long> windowed = counted.apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(7))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(19))).accumulatingFiredPanes().withAllowedLateness(Duration.standardMinutes(3L)));
    final WindowingStrategy<?, ?> windowedStrategy = windowed.getWindowingStrategy();
    PCollection<KV<String, Long>> keyed = windowed.apply(WithKeys.<String, Long>of("foo"));
    PCollection<KV<String, Iterable<Long>>> grouped = keyed.apply(GroupByKey.<String, Long>create());
    final RunnerApi.Pipeline pipelineProto = SdkComponents.translatePipeline(pipeline);
    pipeline.traverseTopologically(new PipelineVisitor.Defaults() {

        Set<Node> transforms = new HashSet<>();

        Set<PCollection<?>> pcollections = new HashSet<>();

        Set<Equivalence.Wrapper<? extends Coder<?>>> coders = new HashSet<>();

        Set<WindowingStrategy<?, ?>> windowingStrategies = new HashSet<>();

        @Override
        public void leaveCompositeTransform(Node node) {
            if (node.isRootNode()) {
                assertThat("Unexpected number of PTransforms", pipelineProto.getComponents().getTransformsCount(), equalTo(transforms.size()));
                assertThat("Unexpected number of PCollections", pipelineProto.getComponents().getPcollectionsCount(), equalTo(pcollections.size()));
                assertThat("Unexpected number of Coders", pipelineProto.getComponents().getCodersCount(), equalTo(coders.size()));
                assertThat("Unexpected number of Windowing Strategies", pipelineProto.getComponents().getWindowingStrategiesCount(), equalTo(windowingStrategies.size()));
            } else {
                transforms.add(node);
            }
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            transforms.add(node);
        }

        @Override
        public void visitValue(PValue value, Node producer) {
            if (value instanceof PCollection) {
                PCollection pc = (PCollection) value;
                pcollections.add(pc);
                addCoders(pc.getCoder());
                windowingStrategies.add(pc.getWindowingStrategy());
                addCoders(pc.getWindowingStrategy().getWindowFn().windowCoder());
            }
        }

        private void addCoders(Coder<?> coder) {
            coders.add(Equivalence.<Coder<?>>identity().wrap(coder));
            if (coder instanceof StructuredCoder) {
                for (Coder<?> component : ((StructuredCoder<?>) coder).getComponents()) {
                    addCoders(component);
                }
            }
        }
    });
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) WindowingStrategy(org.apache.beam.sdk.values.WindowingStrategy) RunnerApi(org.apache.beam.sdk.common.runner.v1.RunnerApi) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) HashSet(java.util.HashSet) Coder(org.apache.beam.sdk.coders.Coder) SetCoder(org.apache.beam.sdk.coders.SetCoder) StringUtf8Coder(org.apache.beam.sdk.coders.StringUtf8Coder) KvCoder(org.apache.beam.sdk.coders.KvCoder) BigEndianLongCoder(org.apache.beam.sdk.coders.BigEndianLongCoder) IterableCoder(org.apache.beam.sdk.coders.IterableCoder) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) ByteArrayCoder(org.apache.beam.sdk.coders.ByteArrayCoder) KV(org.apache.beam.sdk.values.KV) PValue(org.apache.beam.sdk.values.PValue) PCollection(org.apache.beam.sdk.values.PCollection) StructuredCoder(org.apache.beam.sdk.coders.StructuredCoder) Test(org.junit.Test)

Example 2 with Node

use of org.apache.beam.sdk.runners.TransformHierarchy.Node in project beam by apache.

the class PipelineTest method testReplaceAll.

@Test
public void testReplaceAll() {
    pipeline.enableAbandonedNodeEnforcement(false);
    pipeline.apply("unbounded", GenerateSequence.from(0));
    pipeline.apply("bounded", GenerateSequence.from(0).to(100));
    pipeline.replaceAll(ImmutableList.of(PTransformOverride.of(new PTransformMatcher() {

        @Override
        public boolean matches(AppliedPTransform<?, ?, ?> application) {
            return application.getTransform() instanceof GenerateSequence;
        }
    }, new GenerateSequenceToCreateOverride()), PTransformOverride.of(new PTransformMatcher() {

        @Override
        public boolean matches(AppliedPTransform<?, ?, ?> application) {
            return application.getTransform() instanceof Create.Values;
        }
    }, new CreateValuesToEmptyFlattenOverride())));
    pipeline.traverseTopologically(new PipelineVisitor.Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            if (!node.isRootNode()) {
                assertThat(node.getTransform().getClass(), not(anyOf(Matchers.<Class<? extends PTransform>>equalTo(GenerateSequence.class), Matchers.<Class<? extends PTransform>>equalTo(Create.Values.class))));
            }
            return CompositeBehavior.ENTER_TRANSFORM;
        }
    });
}
Also used : PTransformMatcher(org.apache.beam.sdk.runners.PTransformMatcher) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) Create(org.apache.beam.sdk.transforms.Create) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) PTransformOverride(org.apache.beam.sdk.runners.PTransformOverride) PTransform(org.apache.beam.sdk.transforms.PTransform) AppliedPTransform(org.apache.beam.sdk.runners.AppliedPTransform) Test(org.junit.Test)

Example 3 with Node

use of org.apache.beam.sdk.runners.TransformHierarchy.Node in project beam by apache.

the class TransformHierarchyTest method replaceSucceeds.

@Test
public void replaceSucceeds() {
    PTransform<?, ?> enclosingPT = new PTransform<PInput, POutput>() {

        @Override
        public POutput expand(PInput input) {
            return PDone.in(input.getPipeline());
        }
    };
    TransformHierarchy.Node enclosing = hierarchy.pushNode("Enclosing", PBegin.in(pipeline), enclosingPT);
    Create.Values<Long> originalTransform = Create.of(1L);
    TransformHierarchy.Node original = hierarchy.pushNode("Create", PBegin.in(pipeline), originalTransform);
    assertThat(hierarchy.getCurrent(), equalTo(original));
    PCollection<Long> originalOutput = pipeline.apply(originalTransform);
    hierarchy.setOutput(originalOutput);
    hierarchy.popNode();
    assertThat(original.finishedSpecifying, is(true));
    hierarchy.setOutput(PDone.in(pipeline));
    hierarchy.popNode();
    assertThat(hierarchy.getCurrent(), not(equalTo(enclosing)));
    Read.Bounded<Long> replacementTransform = Read.from(CountingSource.upTo(1L));
    PCollection<Long> replacementOutput = pipeline.apply(replacementTransform);
    Node replacement = hierarchy.replaceNode(original, PBegin.in(pipeline), replacementTransform);
    assertThat(hierarchy.getCurrent(), equalTo(replacement));
    hierarchy.setOutput(replacementOutput);
    TaggedPValue taggedReplacement = TaggedPValue.ofExpandedValue(replacementOutput);
    Map<PValue, ReplacementOutput> replacementOutputs = Collections.<PValue, ReplacementOutput>singletonMap(replacementOutput, ReplacementOutput.of(TaggedPValue.ofExpandedValue(originalOutput), taggedReplacement));
    hierarchy.replaceOutputs(replacementOutputs);
    assertThat(replacement.getInputs(), equalTo(original.getInputs()));
    assertThat(replacement.getEnclosingNode(), equalTo(original.getEnclosingNode()));
    assertThat(replacement.getEnclosingNode(), equalTo(enclosing));
    assertThat(replacement.getTransform(), Matchers.<PTransform<?, ?>>equalTo(replacementTransform));
    // THe tags of the replacement transform are matched to the appropriate PValues of the original
    assertThat(replacement.getOutputs().keySet(), Matchers.<TupleTag<?>>contains(taggedReplacement.getTag()));
    assertThat(replacement.getOutputs().values(), Matchers.<PValue>contains(originalOutput));
    hierarchy.popNode();
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) PInput(org.apache.beam.sdk.values.PInput) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) Read(org.apache.beam.sdk.io.Read) ReplacementOutput(org.apache.beam.sdk.runners.PTransformOverrideFactory.ReplacementOutput) Create(org.apache.beam.sdk.transforms.Create) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 4 with Node

use of org.apache.beam.sdk.runners.TransformHierarchy.Node in project beam by apache.

the class TransformHierarchyTest method visitAfterReplace.

/**
   * Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any
   * of the original nodes or inaccessible values but does visit all of the replacement nodes,
   * new inaccessible replacement values, and the original output values.
   */
@Test
public void visitAfterReplace() {
    Node root = hierarchy.getCurrent();
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Entry<TupleTag<?>, PValue> replacementLongs = Iterables.getOnlyElement(replacementOutput.expand().entrySet());
    hierarchy.replaceOutputs(Collections.<PValue, ReplacementOutput>singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    hierarchy.popNode();
    final Set<Node> visitedCompositeNodes = new HashSet<>();
    final Set<Node> visitedPrimitiveNodes = new HashSet<>();
    Set<PValue> visitedValues = hierarchy.visit(new Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            visitedCompositeNodes.add(node);
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            visitedPrimitiveNodes.add(node);
        }
    });
    /*
     Final Graph:
     Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out)
     */
    assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode));
    assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode));
    assertThat(visitedValues, Matchers.<PValue>containsInAnyOrder(upstream, output));
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with Node

use of org.apache.beam.sdk.runners.TransformHierarchy.Node in project beam by apache.

the class TransformHierarchyTest method emptyCompositeSucceeds.

@Test
public void emptyCompositeSucceeds() {
    PCollection<Long> created = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED);
    TransformHierarchy.Node node = hierarchy.pushNode("Create", PBegin.in(pipeline), Create.of(1));
    hierarchy.setOutput(created);
    hierarchy.popNode();
    PCollectionList<Long> pcList = PCollectionList.of(created);
    TransformHierarchy.Node emptyTransform = hierarchy.pushNode("Extract", pcList, new PTransform<PCollectionList<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollectionList<Long> input) {
            return input.get(0);
        }
    });
    hierarchy.setOutput(created);
    hierarchy.popNode();
    assertThat(hierarchy.getProducer(created), equalTo(node));
    assertThat("A Transform that produces non-primitive output should be composite", emptyTransform.isCompositeNode(), is(true));
}
Also used : PCollectionList(org.apache.beam.sdk.values.PCollectionList) PCollection(org.apache.beam.sdk.values.PCollection) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) Test(org.junit.Test)

Aggregations

Node (org.apache.beam.sdk.runners.TransformHierarchy.Node)13 Test (org.junit.Test)12 PipelineVisitor (org.apache.beam.sdk.Pipeline.PipelineVisitor)7 PCollection (org.apache.beam.sdk.values.PCollection)5 PValue (org.apache.beam.sdk.values.PValue)5 HashSet (java.util.HashSet)4 PTransform (org.apache.beam.sdk.transforms.PTransform)4 TaggedPValue (org.apache.beam.sdk.values.TaggedPValue)4 GenerateSequence (org.apache.beam.sdk.io.GenerateSequence)3 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)3 Create (org.apache.beam.sdk.transforms.Create)3 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 Defaults (org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults)2 RunnerApi (org.apache.beam.sdk.common.runner.v1.RunnerApi)2 Read (org.apache.beam.sdk.io.Read)2 PTransformOverride (org.apache.beam.sdk.runners.PTransformOverride)2 DoFn (org.apache.beam.sdk.transforms.DoFn)2 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2