Search in sources :

Example 81 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class ViewTest method testWindowedMultimapAsEntrySetSideInput.

@Test
@Category(ValidatesRunner.class)
public void testWindowedMultimapAsEntrySetSideInput() {
    final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("a", 2), new Instant(7)), TimestampedValue.of(KV.of("b", 3), new Instant(14)))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMultimap());
    PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of(1, /* size */
    new Instant(5)), TimestampedValue.of(1, /* size */
    new Instant(16)))).apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<Integer, KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            assertEquals((int) c.element(), c.sideInput(view).size());
            assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
            for (Entry<String, Iterable<Integer>> entry : c.sideInput(view).entrySet()) {
                for (Integer value : entry.getValue()) {
                    c.output(KV.of(entry.getKey(), value));
                }
            }
        }
    }).withSideInputs(view));
    PAssert.that(output).containsInAnyOrder(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3));
    pipeline.run();
}
Also used : Entry(java.util.Map.Entry) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Map(java.util.Map) HashMap(java.util.HashMap) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 82 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class ViewTest method testWindowedMultimapSideInputWithNonDeterministicKeyCoder.

@Test
@Category(ValidatesRunner.class)
public void testWindowedMultimapSideInputWithNonDeterministicKeyCoder() {
    final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("a", 2), new Instant(7)), TimestampedValue.of(KV.of("b", 3), new Instant(14))).withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMultimap());
    PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(13)), TimestampedValue.of("blackberry", new Instant(16)))).apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
                c.output(of(c.element(), v));
            }
        }
    }).withSideInputs(view));
    PAssert.that(output).containsInAnyOrder(KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3));
    pipeline.run();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Map(java.util.Map) HashMap(java.util.HashMap) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 83 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class ViewTest method testWindowedMapSideInput.

@Test
@Category(ValidatesRunner.class)
public void testWindowedMapSideInput() {
    final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("b", 2), new Instant(4)), TimestampedValue.of(KV.of("b", 3), new Instant(18)))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMap());
    PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(4)), TimestampedValue.of("blackberry", new Instant(16)))).apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
        }
    }).withSideInputs(view));
    PAssert.that(output).containsInAnyOrder(KV.of("apple", 1), KV.of("banana", 2), KV.of("blackberry", 3));
    pipeline.run();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Map(java.util.Map) HashMap(java.util.HashMap) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 84 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class ViewTest method testWindowedMapAsEntrySetSideInput.

@Test
@Category(ValidatesRunner.class)
public void testWindowedMapAsEntrySetSideInput() {
    final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("b", 2), new Instant(4)), TimestampedValue.of(KV.of("b", 3), new Instant(18)))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMap());
    PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of(2, /* size */
    new Instant(5)), TimestampedValue.of(1, /* size */
    new Instant(16)))).apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<Integer, KV<String, Integer>>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            assertEquals((int) c.element(), c.sideInput(view).size());
            assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
            for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) {
                c.output(KV.of(entry.getKey(), entry.getValue()));
            }
        }
    }).withSideInputs(view));
    PAssert.that(output).containsInAnyOrder(KV.of("a", 1), KV.of("b", 2), KV.of("b", 3));
    pipeline.run();
}
Also used : Entry(java.util.Map.Entry) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Map(java.util.Map) HashMap(java.util.HashMap) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 85 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class SplittableDoFnTest method testLateData.

@Test
@Category({ ValidatesRunner.class, UsesSplittableParDo.class, UsesTestStream.class })
public void testLateData() throws Exception {
    Instant base = Instant.now();
    TestStream<String> stream = TestStream.create(StringUtf8Coder.of()).advanceWatermarkTo(base).addElements("aa").advanceWatermarkTo(base.plus(Duration.standardSeconds(5))).addElements(TimestampedValue.of("bb", base.minus(Duration.standardHours(1)))).advanceProcessingTime(Duration.standardHours(1)).advanceWatermarkToInfinity();
    PCollection<String> input = p.apply(stream).apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))).withAllowedLateness(Duration.standardMinutes(1)).discardingFiredPanes());
    PCollection<KV<String, Integer>> afterSDF = input.apply(ParDo.of(new PairStringWithIndexToLength())).setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
    PCollection<String> nonLate = afterSDF.apply(GroupByKey.<String, Integer>create()).apply(Keys.<String>create());
    // The splittable DoFn itself should not drop any data and act as pass-through.
    PAssert.that(afterSDF).containsInAnyOrder(Arrays.asList(KV.of("aa", 0), KV.of("aa", 1), KV.of("bb", 0), KV.of("bb", 1)));
    // But it should preserve the windowing strategy of the data, including allowed lateness:
    // the follow-up GBK should drop the late data.
    assertEquals(afterSDF.getWindowingStrategy(), input.getWindowingStrategy());
    PAssert.that(nonLate).containsInAnyOrder("aa");
    p.run();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

KV (org.apache.beam.sdk.values.KV)192 Test (org.junit.Test)143 Instant (org.joda.time.Instant)66 Category (org.junit.experimental.categories.Category)62 Pipeline (org.apache.beam.sdk.Pipeline)35 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)34 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)33 Matchers.containsString (org.hamcrest.Matchers.containsString)33 StateSpec (org.apache.beam.sdk.state.StateSpec)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 ArrayList (java.util.ArrayList)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)19 TupleTag (org.apache.beam.sdk.values.TupleTag)16 TableRow (com.google.api.services.bigquery.model.TableRow)15 Map (java.util.Map)15 ValueState (org.apache.beam.sdk.state.ValueState)15 List (java.util.List)14 ImmutableList (com.google.common.collect.ImmutableList)12 HashMap (java.util.HashMap)12 Timer (org.apache.beam.sdk.state.Timer)12