Search in sources :

Example 31 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class TransformHierarchyTest method visitIsTopologicallyOrdered.

@Test
public void visitIsTopologicallyOrdered() {
    PCollection<String> one = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.BOUNDED, StringUtf8Coder.of());
    final PCollection<Integer> two = PCollection.createPrimitiveOutputInternal(pipeline, WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED, VarIntCoder.of());
    final PDone done = PDone.in(pipeline);
    final TupleTag<String> oneTag = new TupleTag<String>() {
    };
    final TupleTag<Integer> twoTag = new TupleTag<Integer>() {
    };
    final PCollectionTuple oneAndTwo = PCollectionTuple.of(oneTag, one).and(twoTag, two);
    PTransform<PCollection<String>, PDone> multiConsumer = new PTransform<PCollection<String>, PDone>() {

        @Override
        public PDone expand(PCollection<String> input) {
            return done;
        }

        @Override
        public Map<TupleTag<?>, PValue> getAdditionalInputs() {
            return Collections.singletonMap(twoTag, two);
        }
    };
    hierarchy.pushNode("consumes_both", one, multiConsumer);
    hierarchy.setOutput(done);
    hierarchy.popNode();
    final PTransform<PBegin, PCollectionTuple> producer = new PTransform<PBegin, PCollectionTuple>() {

        @Override
        public PCollectionTuple expand(PBegin input) {
            return oneAndTwo;
        }
    };
    hierarchy.pushNode("encloses_producer", PBegin.in(pipeline), new PTransform<PBegin, PCollectionTuple>() {

        @Override
        public PCollectionTuple expand(PBegin input) {
            return input.apply(producer);
        }
    });
    hierarchy.pushNode("creates_one_and_two", PBegin.in(pipeline), producer);
    hierarchy.setOutput(oneAndTwo);
    hierarchy.popNode();
    hierarchy.setOutput(oneAndTwo);
    hierarchy.popNode();
    hierarchy.pushNode("second_copy_of_consumes_both", one, multiConsumer);
    hierarchy.setOutput(done);
    hierarchy.popNode();
    final Set<Node> visitedNodes = new HashSet<>();
    final Set<Node> exitedNodes = new HashSet<>();
    final Set<PValue> visitedValues = new HashSet<>();
    hierarchy.visit(new PipelineVisitor.Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            for (PValue input : node.getInputs().values()) {
                assertThat(visitedValues, hasItem(input));
            }
            assertThat("Nodes should not be visited more than once", visitedNodes, not(hasItem(node)));
            if (!node.isRootNode()) {
                assertThat("Nodes should always be visited after their enclosing nodes", visitedNodes, hasItem(node.getEnclosingNode()));
            }
            visitedNodes.add(node);
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void leaveCompositeTransform(Node node) {
            assertThat(visitedNodes, hasItem(node));
            if (!node.isRootNode()) {
                assertThat("Nodes should always be left before their enclosing nodes are left", exitedNodes, not(hasItem(node.getEnclosingNode())));
            }
            assertThat(exitedNodes, not(hasItem(node)));
            exitedNodes.add(node);
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            assertThat(visitedNodes, hasItem(node.getEnclosingNode()));
            assertThat(exitedNodes, not(hasItem(node.getEnclosingNode())));
            assertThat("Nodes should not be visited more than once", visitedNodes, not(hasItem(node)));
            for (PValue input : node.getInputs().values()) {
                assertThat(visitedValues, hasItem(input));
            }
            visitedNodes.add(node);
        }

        @Override
        public void visitValue(PValue value, Node producer) {
            assertThat(visitedNodes, hasItem(producer));
            assertThat(visitedValues, not(hasItem(value)));
            visitedValues.add(value);
        }
    });
    assertThat("Should have visited all the nodes", visitedNodes.size(), equalTo(5));
    assertThat("Should have left all of the visited composites", exitedNodes.size(), equalTo(2));
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) TupleTag(org.apache.beam.sdk.values.TupleTag) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) PBegin(org.apache.beam.sdk.values.PBegin) PCollection(org.apache.beam.sdk.values.PCollection) PDone(org.apache.beam.sdk.values.PDone) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PipelineVisitor(org.apache.beam.sdk.Pipeline.PipelineVisitor) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 32 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class TransformHierarchyTest method replaceWithCompositeSucceeds.

@Test
public void replaceWithCompositeSucceeds() {
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    TransformHierarchy.Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Entry<TupleTag<?>, PCollection<?>> replacementLongs = (Map.Entry) Iterables.getOnlyElement(replacementOutput.expand().entrySet());
    hierarchy.replaceOutputs(Collections.singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    assertThat(replacementParNode.getOutputs().keySet(), Matchers.contains(replacementLongs.getKey()));
    assertThat(replacementParNode.getOutputs().values(), Matchers.contains(output));
    assertThat(compositeNode.getOutputs().keySet(), equalTo(replacementOutput.get(longs).expand().keySet()));
    assertThat(compositeNode.getOutputs().values(), Matchers.contains(output));
    hierarchy.popNode();
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) Entry(java.util.Map.Entry) DoFn(org.apache.beam.sdk.transforms.DoFn) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 33 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class TransformTreeTest method testCompositeCapture.

@Test
public void testCompositeCapture() throws Exception {
    p.enableAbandonedNodeEnforcement(false);
    File inputFile = tmpFolder.newFile();
    File outputFile = tmpFolder.newFile();
    final PTransform<PCollection<String>, PCollection<Iterable<String>>> sample = Sample.fixedSizeGlobally(10);
    p.apply("ReadMyFile", TextIO.read().from(inputFile.getPath())).apply(sample).apply(Flatten.iterables()).apply("WriteMyFile", TextIO.write().to(outputFile.getPath()));
    final EnumSet<TransformsSeen> visited = EnumSet.noneOf(TransformsSeen.class);
    final EnumSet<TransformsSeen> left = EnumSet.noneOf(TransformsSeen.class);
    p.traverseTopologically(new Pipeline.PipelineVisitor.Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
            if (node.isRootNode()) {
                return CompositeBehavior.ENTER_TRANSFORM;
            }
            PTransform<?, ?> transform = node.getTransform();
            if (sample.getClass().equals(transform.getClass())) {
                assertTrue(visited.add(TransformsSeen.SAMPLE));
                assertNotNull(node.getEnclosingNode());
                assertTrue(node.isCompositeNode());
            } else if (transform instanceof WriteFiles) {
                assertTrue(visited.add(TransformsSeen.WRITE));
                assertNotNull(node.getEnclosingNode());
                assertTrue(node.isCompositeNode());
            }
            assertThat(transform, not(instanceOf(Impulse.class)));
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void leaveCompositeTransform(TransformHierarchy.Node node) {
            PTransform<?, ?> transform = node.getTransform();
            if (!node.isRootNode() && transform.getClass().equals(sample.getClass())) {
                assertTrue(left.add(TransformsSeen.SAMPLE));
            }
        }

        @Override
        public void visitPrimitiveTransform(TransformHierarchy.Node node) {
            PTransform<?, ?> transform = node.getTransform();
            // Composites should not be visited here.
            assertThat(transform, not(instanceOf(Combine.Globally.class)));
            assertThat(transform, not(instanceOf(WriteFiles.class)));
            assertThat(transform, not(instanceOf(TextIO.Read.class)));
            // There are multiple impulses in the graph so we don't validate that we haven't
            // seen one before.
            visited.add(TransformsSeen.IMPULSE);
        }
    });
    assertEquals(visited, EnumSet.allOf(TransformsSeen.class));
    assertEquals(left, EnumSet.of(TransformsSeen.SAMPLE));
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) WriteFiles(org.apache.beam.sdk.io.WriteFiles) File(java.io.File) Impulse(org.apache.beam.sdk.transforms.Impulse) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Example 34 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class DeadLetteredTransformTest method testDeadLettersOnlyFailures.

@Test
@Category(NeedsRunner.class)
public void testDeadLettersOnlyFailures() throws Exception {
    resetFailures();
    PCollection<Long> elements = p.apply(Create.of(10L, 20L).withCoder(VarLongCoder.of()));
    PCollection<Long> results = elements.apply(new DeadLetteredTransform<>(SimpleFunction.fromSerializableFunctionWithOutputType(x -> {
        if (x == 10L) {
            throw new RuntimeException(FAILURE_KEY);
        }
        return x;
    }, TypeDescriptor.of(Long.class)), new PTransform<PCollection<Failure>, PDone>() {

        @Override
        public PDone expand(PCollection<Failure> input) {
            input.apply(MapElements.into(TypeDescriptor.of(Void.class)).via(failure -> {
                capture(failure);
                return null;
            }));
            return PDone.in(input.getPipeline());
        }
    }));
    PAssert.that(results).containsInAnyOrder(20L);
    p.run().waitUntilFinish();
    List<Failure> failures = getFailures();
    assertEquals(1, failures.size());
    Failure failure = failures.iterator().next();
    assertEquals(10L, VarLongCoder.of().decode(new ByteArrayInputStream(failure.getPayload())).longValue());
    assertTrue(failure.getError().contains(FAILURE_KEY));
}
Also used : MapElements(org.apache.beam.sdk.transforms.MapElements) TypeDescriptor(org.apache.beam.sdk.values.TypeDescriptor) PDone(org.apache.beam.sdk.values.PDone) PAssert(org.apache.beam.sdk.testing.PAssert) RunWith(org.junit.runner.RunWith) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) VarLongCoder(org.apache.beam.sdk.coders.VarLongCoder) NeedsRunner(org.apache.beam.sdk.testing.NeedsRunner) SimpleFunction(org.apache.beam.sdk.transforms.SimpleFunction) PCollection(org.apache.beam.sdk.values.PCollection) Category(org.junit.experimental.categories.Category) ArrayList(java.util.ArrayList) PTransform(org.apache.beam.sdk.transforms.PTransform) List(java.util.List) Rule(org.junit.Rule) ByteArrayInputStream(java.io.ByteArrayInputStream) Create(org.apache.beam.sdk.transforms.Create) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Assert.assertEquals(org.junit.Assert.assertEquals) PCollection(org.apache.beam.sdk.values.PCollection) ByteArrayInputStream(java.io.ByteArrayInputStream) PTransform(org.apache.beam.sdk.transforms.PTransform) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 35 with PTransform

use of org.apache.beam.sdk.transforms.PTransform in project beam by apache.

the class DisplayDataTest method testTypicalUsage.

@Test
public void testTypicalUsage() {
    final HasDisplayData subComponent1 = new HasDisplayData() {

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            builder.add(DisplayData.item("ExpectedAnswer", 42));
        }
    };
    final HasDisplayData subComponent2 = new HasDisplayData() {

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            builder.add(DisplayData.item("Location", "Seattle")).add(DisplayData.item("Forecast", "Rain"));
        }
    };
    PTransform<?, ?> transform = new PTransform<PCollection<String>, PCollection<String>>() {

        final Instant defaultStartTime = new Instant(0);

        Instant startTime = defaultStartTime;

        @Override
        public PCollection<String> expand(PCollection<String> begin) {
            throw new IllegalArgumentException("Should never be applied");
        }

        @Override
        public void populateDisplayData(DisplayData.Builder builder) {
            builder.include("p1", subComponent1).include("p2", subComponent2).add(DisplayData.item("minSproggles", 200).withLabel("Minimum Required Sproggles")).add(DisplayData.item("fireLasers", true)).addIfNotDefault(DisplayData.item("startTime", startTime), defaultStartTime).add(DisplayData.item("timeBomb", Instant.now().plus(Duration.standardDays(1)))).add(DisplayData.item("filterLogic", subComponent1.getClass())).add(DisplayData.item("serviceUrl", "google.com/fizzbang").withLinkUrl("http://www.google.com/fizzbang"));
        }
    };
    DisplayData data = DisplayData.from(transform);
    assertThat(data.items(), not(empty()));
    assertThat(data.items(), everyItem(allOf(hasKey(not(isEmptyOrNullString())), hasNamespace(Matchers.<Class<?>>isOneOf(transform.getClass(), subComponent1.getClass(), subComponent2.getClass())), hasType(notNullValue(DisplayData.Type.class)), hasValue(not(isEmptyOrNullString())))));
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) Instant(org.joda.time.Instant) Matchers.isEmptyOrNullString(org.hamcrest.Matchers.isEmptyOrNullString) PTransform(org.apache.beam.sdk.transforms.PTransform) Test(org.junit.Test)

Aggregations

PTransform (org.apache.beam.sdk.transforms.PTransform)41 PCollection (org.apache.beam.sdk.values.PCollection)29 Test (org.junit.Test)18 AppliedPTransform (org.apache.beam.sdk.runners.AppliedPTransform)11 PBegin (org.apache.beam.sdk.values.PBegin)11 IOException (java.io.IOException)10 ArrayList (java.util.ArrayList)10 List (java.util.List)10 Map (java.util.Map)10 TupleTag (org.apache.beam.sdk.values.TupleTag)10 DoFn (org.apache.beam.sdk.transforms.DoFn)9 Coder (org.apache.beam.sdk.coders.Coder)8 Create (org.apache.beam.sdk.transforms.Create)8 ParDo (org.apache.beam.sdk.transforms.ParDo)7 PDone (org.apache.beam.sdk.values.PDone)7 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)6 Collection (java.util.Collection)5 HashMap (java.util.HashMap)5 Collectors.toList (java.util.stream.Collectors.toList)5 Schema (org.apache.beam.sdk.schemas.Schema)5