use of org.apache.beam.sdk.values.KV in project beam by apache.
the class ViewTest method testWindowedMultimapAsEntrySetSideInput.
@Test
@Category(ValidatesRunner.class)
public void testWindowedMultimapAsEntrySetSideInput() {
final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("a", 2), new Instant(7)), TimestampedValue.of(KV.of("b", 3), new Instant(14)))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMultimap());
PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of(1, /* size */
new Instant(5)), TimestampedValue.of(1, /* size */
new Instant(16)))).apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<Integer, KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
assertEquals((int) c.element(), c.sideInput(view).size());
assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
for (Entry<String, Iterable<Integer>> entry : c.sideInput(view).entrySet()) {
for (Integer value : entry.getValue()) {
c.output(KV.of(entry.getKey(), value));
}
}
}
}).withSideInputs(view));
PAssert.that(output).containsInAnyOrder(KV.of("a", 1), KV.of("a", 2), KV.of("b", 3));
pipeline.run();
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class ViewTest method testWindowedMultimapSideInputWithNonDeterministicKeyCoder.
@Test
@Category(ValidatesRunner.class)
public void testWindowedMultimapSideInputWithNonDeterministicKeyCoder() {
final PCollectionView<Map<String, Iterable<Integer>>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("a", 2), new Instant(7)), TimestampedValue.of(KV.of("b", 3), new Instant(14))).withCoder(KvCoder.of(new NonDeterministicStringCoder(), VarIntCoder.of()))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMultimap());
PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(13)), TimestampedValue.of("blackberry", new Instant(16)))).apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
for (Integer v : c.sideInput(view).get(c.element().substring(0, 1))) {
c.output(of(c.element(), v));
}
}
}).withSideInputs(view));
PAssert.that(output).containsInAnyOrder(KV.of("apple", 1), KV.of("apple", 2), KV.of("banana", 3), KV.of("blackberry", 3));
pipeline.run();
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class ViewTest method testWindowedMapSideInput.
@Test
@Category(ValidatesRunner.class)
public void testWindowedMapSideInput() {
final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("b", 2), new Instant(4)), TimestampedValue.of(KV.of("b", 3), new Instant(18)))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMap());
PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of("apple", new Instant(5)), TimestampedValue.of("banana", new Instant(4)), TimestampedValue.of("blackberry", new Instant(16)))).apply("MainWindowInto", Window.<String>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<String, KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output(KV.of(c.element(), c.sideInput(view).get(c.element().substring(0, 1))));
}
}).withSideInputs(view));
PAssert.that(output).containsInAnyOrder(KV.of("apple", 1), KV.of("banana", 2), KV.of("blackberry", 3));
pipeline.run();
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class ViewTest method testWindowedMapAsEntrySetSideInput.
@Test
@Category(ValidatesRunner.class)
public void testWindowedMapAsEntrySetSideInput() {
final PCollectionView<Map<String, Integer>> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(KV.of("a", 1), new Instant(1)), TimestampedValue.of(KV.of("b", 2), new Instant(4)), TimestampedValue.of(KV.of("b", 3), new Instant(18)))).apply("SideWindowInto", Window.<KV<String, Integer>>into(FixedWindows.of(Duration.millis(10)))).apply(View.<String, Integer>asMap());
PCollection<KV<String, Integer>> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of(2, /* size */
new Instant(5)), TimestampedValue.of(1, /* size */
new Instant(16)))).apply("MainWindowInto", Window.<Integer>into(FixedWindows.of(Duration.millis(10)))).apply("OutputSideInputs", ParDo.of(new DoFn<Integer, KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext c) {
assertEquals((int) c.element(), c.sideInput(view).size());
assertEquals((int) c.element(), c.sideInput(view).entrySet().size());
for (Entry<String, Integer> entry : c.sideInput(view).entrySet()) {
c.output(KV.of(entry.getKey(), entry.getValue()));
}
}
}).withSideInputs(view));
PAssert.that(output).containsInAnyOrder(KV.of("a", 1), KV.of("b", 2), KV.of("b", 3));
pipeline.run();
}
use of org.apache.beam.sdk.values.KV in project beam by apache.
the class SplittableDoFnTest method testLateData.
@Test
@Category({ ValidatesRunner.class, UsesSplittableParDo.class, UsesTestStream.class })
public void testLateData() throws Exception {
Instant base = Instant.now();
TestStream<String> stream = TestStream.create(StringUtf8Coder.of()).advanceWatermarkTo(base).addElements("aa").advanceWatermarkTo(base.plus(Duration.standardSeconds(5))).addElements(TimestampedValue.of("bb", base.minus(Duration.standardHours(1)))).advanceProcessingTime(Duration.standardHours(1)).advanceWatermarkToInfinity();
PCollection<String> input = p.apply(stream).apply(Window.<String>into(FixedWindows.of(Duration.standardMinutes(1))).withAllowedLateness(Duration.standardMinutes(1)).discardingFiredPanes());
PCollection<KV<String, Integer>> afterSDF = input.apply(ParDo.of(new PairStringWithIndexToLength())).setCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()));
PCollection<String> nonLate = afterSDF.apply(GroupByKey.<String, Integer>create()).apply(Keys.<String>create());
// The splittable DoFn itself should not drop any data and act as pass-through.
PAssert.that(afterSDF).containsInAnyOrder(Arrays.asList(KV.of("aa", 0), KV.of("aa", 1), KV.of("bb", 0), KV.of("bb", 1)));
// But it should preserve the windowing strategy of the data, including allowed lateness:
// the follow-up GBK should drop the late data.
assertEquals(afterSDF.getWindowingStrategy(), input.getWindowingStrategy());
PAssert.that(nonLate).containsInAnyOrder("aa");
p.run();
}
Aggregations