Search in sources :

Example 36 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class PipelineTest method testTupleInjectionTransform.

/**
 * Tests that Pipeline supports putting an element into a tuple as a transform.
 */
@Test
@Category(ValidatesRunner.class)
public void testTupleInjectionTransform() throws Exception {
    PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3, 4));
    TupleTag<Integer> tag = new TupleTag<>();
    PCollectionTuple output = input.apply("ProjectTag", new TupleInjectionTransform<>(tag));
    PAssert.that(output.get(tag)).containsInAnyOrder(1, 2, 3, 4);
    pipeline.run();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 37 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class TransformHierarchyTest method visitAfterReplace.

/**
 * Tests that visiting the {@link TransformHierarchy} after replacing nodes does not visit any of
 * the original nodes or inaccessible values but does visit all of the replacement nodes, new
 * inaccessible replacement values, and the original output values.
 */
@Test
public void visitAfterReplace() {
    Node root = hierarchy.getCurrent();
    final SingleOutput<Long, Long> originalParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    });
    GenerateSequence genUpstream = GenerateSequence.from(0);
    PCollection<Long> upstream = pipeline.apply(genUpstream);
    PCollection<Long> output = upstream.apply("Original", originalParDo);
    Node upstreamNode = hierarchy.pushNode("Upstream", pipeline.begin(), genUpstream);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(upstream);
    hierarchy.popNode();
    Node original = hierarchy.pushNode("Original", upstream, originalParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(output);
    hierarchy.popNode();
    final TupleTag<Long> longs = new TupleTag<>();
    final MultiOutput<Long, Long> replacementParDo = ParDo.of(new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext ctxt) {
            ctxt.output(ctxt.element() + 1L);
        }
    }).withOutputTags(longs, TupleTagList.empty());
    PTransform<PCollection<Long>, PCollection<Long>> replacementComposite = new PTransform<PCollection<Long>, PCollection<Long>>() {

        @Override
        public PCollection<Long> expand(PCollection<Long> input) {
            return input.apply("Contained", replacementParDo).get(longs);
        }
    };
    PCollectionTuple replacementOutput = upstream.apply("Contained", replacementParDo);
    Node compositeNode = hierarchy.replaceNode(original, upstream, replacementComposite);
    Node replacementParNode = hierarchy.pushNode("Original/Contained", upstream, replacementParDo);
    hierarchy.finishSpecifyingInput();
    hierarchy.setOutput(replacementOutput);
    hierarchy.popNode();
    hierarchy.setOutput(replacementOutput.get(longs));
    Map<TupleTag<?>, PCollection<?>> expandedReplacementOutput = (Map) replacementOutput.expand();
    Entry<TupleTag<?>, PCollection<?>> replacementLongs = Iterables.getOnlyElement(expandedReplacementOutput.entrySet());
    hierarchy.replaceOutputs(Collections.singletonMap(replacementOutput.get(longs), ReplacementOutput.of(TaggedPValue.ofExpandedValue(output), TaggedPValue.of(replacementLongs.getKey(), replacementLongs.getValue()))));
    hierarchy.popNode();
    final Set<Node> visitedCompositeNodes = new HashSet<>();
    final Set<Node> visitedPrimitiveNodes = new HashSet<>();
    Set<PValue> visitedValues = hierarchy.visit(new Defaults() {

        @Override
        public CompositeBehavior enterCompositeTransform(Node node) {
            visitedCompositeNodes.add(node);
            return CompositeBehavior.ENTER_TRANSFORM;
        }

        @Override
        public void visitPrimitiveTransform(Node node) {
            visitedPrimitiveNodes.add(node);
        }
    });
    /*
    Final Graph:
    Upstream -> Upstream.out -> Composite -> (ReplacementParDo -> OriginalParDo.out)
    */
    assertThat(visitedCompositeNodes, containsInAnyOrder(root, compositeNode));
    assertThat(visitedPrimitiveNodes, containsInAnyOrder(upstreamNode, replacementParNode));
    assertThat(visitedValues, containsInAnyOrder(upstream, output));
}
Also used : Node(org.apache.beam.sdk.runners.TransformHierarchy.Node) TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) PTransform(org.apache.beam.sdk.transforms.PTransform) HashSet(java.util.HashSet) TaggedPValue(org.apache.beam.sdk.values.TaggedPValue) PValue(org.apache.beam.sdk.values.PValue) GenerateSequence(org.apache.beam.sdk.io.GenerateSequence) PCollection(org.apache.beam.sdk.values.PCollection) DoFn(org.apache.beam.sdk.transforms.DoFn) Defaults(org.apache.beam.sdk.Pipeline.PipelineVisitor.Defaults) Map(java.util.Map) Test(org.junit.Test)

Example 38 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class SqlTransform method toTableMap.

@SuppressWarnings("unchecked")
private Map<String, BeamSqlTable> toTableMap(PInput inputs) {
    /**
     * A single PCollection is transformed to a table named PCOLLECTION, other input types are
     * expanded and converted to tables using the tags as names.
     */
    if (inputs instanceof PCollection) {
        PCollection<?> pCollection = (PCollection<?>) inputs;
        return ImmutableMap.of(PCOLLECTION_NAME, new BeamPCollectionTable(pCollection));
    }
    ImmutableMap.Builder<String, BeamSqlTable> tables = ImmutableMap.builder();
    for (Map.Entry<TupleTag<?>, PValue> input : inputs.expand().entrySet()) {
        PCollection<?> pCollection = (PCollection<?>) input.getValue();
        tables.put(input.getKey().getId(), new BeamPCollectionTable(pCollection));
    }
    return tables.build();
}
Also used : PCollection(org.apache.beam.sdk.values.PCollection) BeamSqlTable(org.apache.beam.sdk.extensions.sql.meta.BeamSqlTable) BeamPCollectionTable(org.apache.beam.sdk.extensions.sql.impl.schema.BeamPCollectionTable) TupleTag(org.apache.beam.sdk.values.TupleTag) PValue(org.apache.beam.sdk.values.PValue) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap) ImmutableMap(org.apache.beam.vendor.calcite.v1_28_0.com.google.common.collect.ImmutableMap)

Example 39 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class BeamSqlDslFilterTest method runCompositeFilter.

private void runCompositeFilter(PCollection<Row> input) throws Exception {
    String sql = "SELECT * FROM TABLE_A" + " WHERE f_int > 1 AND (f_long < 3000 OR f_string = 'string_row3')";
    PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input).apply("testCompositeFilter", SqlTransform.query(sql));
    PAssert.that(result).containsInAnyOrder(rowsInTableA.get(1), rowsInTableA.get(2));
    pipeline.run().waitUntilFinish();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Matchers.containsString(org.hamcrest.Matchers.containsString) Row(org.apache.beam.sdk.values.Row)

Example 40 with TupleTag

use of org.apache.beam.sdk.values.TupleTag in project beam by apache.

the class BeamSqlDslFilterTest method runNoReturnFilter.

private void runNoReturnFilter(PCollection<Row> input) throws Exception {
    String sql = "SELECT * FROM TABLE_A WHERE f_int < 1";
    PCollection<Row> result = PCollectionTuple.of(new TupleTag<>("TABLE_A"), input).apply("testNoReturnFilter", SqlTransform.query(sql));
    PAssert.that(result).empty();
    pipeline.run().waitUntilFinish();
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) Matchers.containsString(org.hamcrest.Matchers.containsString) Row(org.apache.beam.sdk.values.Row)

Aggregations

TupleTag (org.apache.beam.sdk.values.TupleTag)185 Test (org.junit.Test)100 WindowedValue (org.apache.beam.sdk.util.WindowedValue)54 KV (org.apache.beam.sdk.values.KV)54 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)49 PCollection (org.apache.beam.sdk.values.PCollection)42 DoFn (org.apache.beam.sdk.transforms.DoFn)32 Instant (org.joda.time.Instant)32 SerializablePipelineOptions (org.apache.beam.runners.core.construction.SerializablePipelineOptions)30 Map (java.util.Map)29 Pipeline (org.apache.beam.sdk.Pipeline)29 PCollectionView (org.apache.beam.sdk.values.PCollectionView)29 HashMap (java.util.HashMap)27 Coder (org.apache.beam.sdk.coders.Coder)26 StreamRecordStripper.stripStreamRecordFromWindowedValue (org.apache.beam.runners.flink.translation.wrappers.streaming.StreamRecordStripper.stripStreamRecordFromWindowedValue)25 Matchers.containsString (org.hamcrest.Matchers.containsString)25 List (java.util.List)24 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)23 KvCoder (org.apache.beam.sdk.coders.KvCoder)22 KeyedOneInputStreamOperatorTestHarness (org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness)22