Search in sources :

Example 11 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class ParDoTest method testMultiOutputAppliedMultipleTimesDifferentOutputs.

@Test
public void testMultiOutputAppliedMultipleTimesDifferentOutputs() {
    pipeline.enableAbandonedNodeEnforcement(false);
    PCollection<Long> longs = pipeline.apply(GenerateSequence.from(0));
    TupleTag<Long> mainOut = new TupleTag<>();
    final TupleTag<String> valueAsString = new TupleTag<>();
    final TupleTag<Integer> valueAsInt = new TupleTag<>();
    DoFn<Long, Long> fn = new DoFn<Long, Long>() {

        @ProcessElement
        public void processElement(ProcessContext cxt) {
            cxt.output(cxt.element());
            cxt.output(valueAsString, Long.toString(cxt.element()));
            cxt.output(valueAsInt, Long.valueOf(cxt.element()).intValue());
        }
    };
    ParDo.MultiOutput<Long, Long> parDo = ParDo.of(fn).withOutputTags(mainOut, TupleTagList.of(valueAsString).and(valueAsInt));
    PCollectionTuple firstApplication = longs.apply("first", parDo);
    PCollectionTuple secondApplication = longs.apply("second", parDo);
    assertThat(firstApplication, not(equalTo(secondApplication)));
    assertThat(firstApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
    assertThat(secondApplication.getAll().keySet(), Matchers.<TupleTag<?>>containsInAnyOrder(mainOut, valueAsString, valueAsInt));
}
Also used : TupleTag(org.apache.beam.sdk.values.TupleTag) StringUtils.byteArrayToJsonString(org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString) Matchers.containsString(org.hamcrest.Matchers.containsString) UsesStatefulParDo(org.apache.beam.sdk.testing.UsesStatefulParDo) UsesTimersInParDo(org.apache.beam.sdk.testing.UsesTimersInParDo) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Test(org.junit.Test)

Example 12 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class ParDoTest method testMultiOutputChaining.

@Test
@Category(NeedsRunner.class)
public void testMultiOutputChaining() {
    PCollectionTuple filters = pipeline.apply(Create.of(Arrays.asList(3, 4, 5, 6))).apply(new MultiFilter());
    PCollection<Integer> by2 = filters.get(MultiFilter.BY2);
    PCollection<Integer> by3 = filters.get(MultiFilter.BY3);
    // Apply additional filters to each operation.
    PCollection<Integer> by2then3 = by2.apply("Filter3sAgain", ParDo.of(new MultiFilter.FilterFn(3)));
    PCollection<Integer> by3then2 = by3.apply("Filter2sAgain", ParDo.of(new MultiFilter.FilterFn(2)));
    PAssert.that(by2then3).containsInAnyOrder(6);
    PAssert.that(by3then2).containsInAnyOrder(6);
    pipeline.run();
}
Also used : PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 13 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class ReplacementOutputsTest method taggedExtraReplacementThrows.

@Test
public void taggedExtraReplacementThrows() {
    PCollectionTuple original = PCollectionTuple.of(intsTag, ints).and(strsTag, strs);
    thrown.expect(IllegalArgumentException.class);
    thrown.expectMessage("Missing original output");
    thrown.expectMessage(moreIntsTag.toString());
    thrown.expectMessage(moreReplacementInts.toString());
    ReplacementOutputs.tagged(original.expand(), PCollectionTuple.of(strsTag, replacementStrs).and(moreIntsTag, moreReplacementInts).and(intsTag, replacementInts));
}
Also used : PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) Test(org.junit.Test)

Example 14 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class PTransformTranslationTest method multiMultiParDo.

private static AppliedPTransform<?, ?, ?> multiMultiParDo(Pipeline pipeline) {
    PCollectionView<String> view = pipeline.apply(Create.of("foo")).apply(View.<String>asSingleton());
    PCollection<Long> input = pipeline.apply(GenerateSequence.from(0));
    ParDo.MultiOutput<Long, KV<Long, String>> parDo = ParDo.of(new TestDoFn()).withSideInputs(view).withOutputTags(new TupleTag<KV<Long, String>>() {
    }, TupleTagList.of(new TupleTag<KV<String, Long>>() {
    }));
    PCollectionTuple output = input.apply(parDo);
    Map<TupleTag<?>, PValue> inputs = new HashMap<>();
    inputs.putAll(parDo.getAdditionalInputs());
    inputs.putAll(input.expand());
    return AppliedPTransform.<PCollection<Long>, PCollectionTuple, ParDo.MultiOutput<Long, KV<Long, String>>>of("MultiParDoInAndOut", inputs, output.expand(), parDo, pipeline);
}
Also used : HashMap(java.util.HashMap) TupleTag(org.apache.beam.sdk.values.TupleTag) KV(org.apache.beam.sdk.values.KV) PValue(org.apache.beam.sdk.values.PValue) PCollection(org.apache.beam.sdk.values.PCollection) ParDo(org.apache.beam.sdk.transforms.ParDo) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple)

Example 15 with PCollectionTuple

use of org.apache.beam.sdk.values.PCollectionTuple in project beam by apache.

the class CreateStreamTest method testMultiOutputParDo.

/**
   * Test multiple output {@link ParDo} in streaming pipelines.
   * This is currently needed as a test for https://issues.apache.org/jira/browse/BEAM-2029 since
   * {@link org.apache.beam.sdk.testing.ValidatesRunner} tests do not currently run for Spark runner
   * in streaming mode.
   */
@Test
public void testMultiOutputParDo() throws IOException {
    Instant instant = new Instant(0);
    CreateStream<Integer> source1 = CreateStream.of(VarIntCoder.of(), batchDuration()).emptyBatch().advanceWatermarkForNextBatch(instant.plus(Duration.standardMinutes(5))).nextBatch(TimestampedValue.of(1, instant), TimestampedValue.of(2, instant), TimestampedValue.of(3, instant)).advanceNextBatchWatermarkToInfinity();
    PCollection<Integer> inputs = p.apply(source1);
    final TupleTag<Integer> mainTag = new TupleTag<>();
    final TupleTag<Integer> additionalTag = new TupleTag<>();
    PCollectionTuple outputs = inputs.apply(ParDo.of(new DoFn<Integer, Integer>() {

        @SuppressWarnings("unused")
        @ProcessElement
        public void process(ProcessContext context) {
            Integer element = context.element();
            context.output(element);
            context.output(additionalTag, element + 1);
        }
    }).withOutputTags(mainTag, TupleTagList.of(additionalTag)));
    PCollection<Integer> output1 = outputs.get(mainTag).setCoder(VarIntCoder.of());
    PCollection<Integer> output2 = outputs.get(additionalTag).setCoder(VarIntCoder.of());
    PAssert.that(output1).containsInAnyOrder(1, 2, 3);
    PAssert.that(output2).containsInAnyOrder(2, 3, 4);
    p.run();
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple) StreamingTest(org.apache.beam.runners.spark.StreamingTest) Test(org.junit.Test)

Aggregations

PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)31 TupleTag (org.apache.beam.sdk.values.TupleTag)27 Test (org.junit.Test)26 Category (org.junit.experimental.categories.Category)13 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)8 Matchers.containsString (org.hamcrest.Matchers.containsString)8 KV (org.apache.beam.sdk.values.KV)6 PCollection (org.apache.beam.sdk.values.PCollection)5 PCollectionView (org.apache.beam.sdk.values.PCollectionView)4 PValue (org.apache.beam.sdk.values.PValue)4 Pipeline (org.apache.beam.sdk.Pipeline)3 ValueState (org.apache.beam.sdk.state.ValueState)3 DoFn (org.apache.beam.sdk.transforms.DoFn)3 TupleTagList (org.apache.beam.sdk.values.TupleTagList)3 Instant (org.joda.time.Instant)3 TableRow (com.google.api.services.bigquery.model.TableRow)2 List (java.util.List)2 Map (java.util.Map)2 KeyedWorkItem (org.apache.beam.runners.core.KeyedWorkItem)2 StatefulParDo (org.apache.beam.runners.direct.ParDoMultiOverrideFactory.StatefulParDo)2