Search in sources :

Example 1 with TestStream

use of org.apache.beam.sdk.testing.TestStream in project beam by apache.

the class GroupIntoBatchesTest method testInStreamingMode.

@Test
@Category({ NeedsRunner.class, UsesTimersInParDo.class, UsesTestStream.class, UsesStatefulParDo.class })
public void testInStreamingMode() {
    int timestampInterval = 1;
    Instant startInstant = new Instant(0L);
    TestStream.Builder<KV<String, String>> streamBuilder = TestStream.create(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).advanceWatermarkTo(startInstant);
    long offset = 0L;
    for (KV<String, String> element : data) {
        streamBuilder = streamBuilder.addElements(TimestampedValue.of(element, startInstant.plus(Duration.standardSeconds(offset * timestampInterval))));
        offset++;
    }
    final long windowDuration = 6;
    TestStream<KV<String, String>> stream = streamBuilder.advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(windowDuration - 1))).advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(windowDuration + 1))).advanceWatermarkTo(startInstant.plus(Duration.standardSeconds(NUM_ELEMENTS))).advanceWatermarkToInfinity();
    PCollection<KV<String, String>> inputCollection = pipeline.apply(stream).apply(Window.<KV<String, String>>into(FixedWindows.of(Duration.standardSeconds(windowDuration))).withAllowedLateness(Duration.millis(ALLOWED_LATENESS)));
    inputCollection.apply(ParDo.of(new DoFn<KV<String, String>, Void>() {

        @ProcessElement
        public void processElement(ProcessContext c, BoundedWindow window) {
            LOG.debug("*** ELEMENT: ({},{}) *** with timestamp %s in window %s", c.element().getKey(), c.element().getValue(), c.timestamp().toString(), window.toString());
        }
    }));
    PCollection<KV<String, Iterable<String>>> outputCollection = inputCollection.apply(GroupIntoBatches.<String, String>ofSize(BATCH_SIZE)).setCoder(KvCoder.of(StringUtf8Coder.of(), IterableCoder.of(StringUtf8Coder.of())));
    // elements have the same key and collection is divided into windows,
    // so Count.perKey values are the number of elements in windows
    PCollection<KV<String, Long>> countOutput = outputCollection.apply("Count elements in windows after applying GroupIntoBatches", Count.<String, Iterable<String>>perKey());
    PAssert.that("Wrong number of elements in windows after GroupIntoBatches", countOutput).satisfies(new SerializableFunction<Iterable<KV<String, Long>>, Void>() {

        @Override
        public Void apply(Iterable<KV<String, Long>> input) {
            Iterator<KV<String, Long>> inputIterator = input.iterator();
            // first element
            long count0 = inputIterator.next().getValue();
            // window duration is 6 and batch size is 5, so there should be 2 elements in the
            // window (flush because batchSize reached and for end of window reached)
            assertEquals("Wrong number of elements in first window", 2, count0);
            // second element
            long count1 = inputIterator.next().getValue();
            // collection is 10 elements, there is only 4 elements left, so there should be only
            // one element in the window (flush because end of window/collection reached)
            assertEquals("Wrong number of elements in second window", 1, count1);
            // third element
            return null;
        }
    });
    PAssert.that("Incorrect output collection after GroupIntoBatches", outputCollection).satisfies(new SerializableFunction<Iterable<KV<String, Iterable<String>>>, Void>() {

        @Override
        public Void apply(Iterable<KV<String, Iterable<String>>> input) {
            Iterator<KV<String, Iterable<String>>> inputIterator = input.iterator();
            // first element
            int size0 = Iterables.size(inputIterator.next().getValue());
            // window duration is 6 and batch size is 5, so output batch size should de 5
            // (flush because of batchSize reached)
            assertEquals("Wrong first element batch Size", 5, size0);
            // second element
            int size1 = Iterables.size(inputIterator.next().getValue());
            // there is only one element left in the window so batch size should be 1
            // (flush because of end of window reached)
            assertEquals("Wrong second element batch Size", 1, size1);
            // third element
            int size2 = Iterables.size(inputIterator.next().getValue());
            // collection is 10 elements, there is only 4 left, so batch size should be 4
            // (flush because end of collection reached)
            assertEquals("Wrong third element batch Size", 4, size2);
            return null;
        }
    });
    pipeline.run().waitUntilFinish();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) Iterator(java.util.Iterator) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) UsesTestStream(org.apache.beam.sdk.testing.UsesTestStream) TestStream(org.apache.beam.sdk.testing.TestStream) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 2 with TestStream

use of org.apache.beam.sdk.testing.TestStream in project beam by apache.

the class LeaderBoardTest method testTeamScoresObservablyLate.

/**
   * A test where elements arrive behind the watermark (late data) after the watermark passes the
   * end of the window, but before the maximum allowed lateness. These elements are emitted in a
   * late pane.
   */
@Test
public void testTeamScoresObservablyLate() {
    Instant firstWindowCloses = baseTime.plus(ALLOWED_LATENESS).plus(TEAM_WINDOW_DURATION);
    TestStream<GameActionInfo> createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)).advanceWatermarkTo(baseTime).addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8))).advanceProcessingTime(Duration.standardMinutes(10)).advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))).addElements(event(TestUser.RED_ONE, 3, Duration.standardMinutes(1)), event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))).advanceWatermarkTo(firstWindowCloses.minus(Duration.standardMinutes(1))).addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))).advanceProcessingTime(Duration.standardMinutes(12)).addElements(event(TestUser.RED_TWO, 9, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 1, Duration.standardMinutes(3))).advanceWatermarkToInfinity();
    PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
    BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION);
    String blueTeam = TestUser.BLUE_ONE.getTeam();
    String redTeam = TestUser.RED_ONE.getTeam();
    PAssert.that(teamScores).inWindow(window).satisfies((SerializableFunction<Iterable<KV<String, Integer>>, Void>) input -> {
        assertThat(input, hasItem(KV.of(blueTeam, 11)));
        assertThat(input, hasItem(KV.of(redTeam, 27)));
        return null;
    });
    PAssert.thatMap(teamScores).inOnTimePane(window).isEqualTo(ImmutableMap.<String, Integer>builder().put(redTeam, 7).put(blueTeam, 11).build());
    // No final pane is emitted for the blue team, as all of their updates have been taken into
    // account in earlier panes
    PAssert.that(teamScores).inFinalPane(window).containsInAnyOrder(KV.of(redTeam, 27));
    p.run().waitUntilFinish();
}
Also used : KV(org.apache.beam.sdk.values.KV) GameActionInfo(org.apache.beam.examples.complete.game.UserScore.GameActionInfo) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) PTransform(org.apache.beam.sdk.transforms.PTransform) Assert.assertThat(org.junit.Assert.assertThat) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) ImmutableMap(com.google.common.collect.ImmutableMap) PAssert(org.apache.beam.sdk.testing.PAssert) CalculateTeamScores(org.apache.beam.examples.complete.game.LeaderBoard.CalculateTeamScores) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Serializable(java.io.Serializable) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Rule(org.junit.Rule) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) CalculateUserScores(org.apache.beam.examples.complete.game.LeaderBoard.CalculateUserScores) TestStream(org.apache.beam.sdk.testing.TestStream) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) GameActionInfo(org.apache.beam.examples.complete.game.UserScore.GameActionInfo) Instant(org.joda.time.Instant) CalculateTeamScores(org.apache.beam.examples.complete.game.LeaderBoard.CalculateTeamScores) KV(org.apache.beam.sdk.values.KV) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 3 with TestStream

use of org.apache.beam.sdk.testing.TestStream in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class LeaderBoardTest method testTeamScoresObservablyLate.

/**
   * A test where elements arrive behind the watermark (late data) after the watermark passes the
   * end of the window, but before the maximum allowed lateness. These elements are emitted in a
   * late pane.
   */
@Test
public void testTeamScoresObservablyLate() {
    Instant firstWindowCloses = baseTime.plus(ALLOWED_LATENESS).plus(TEAM_WINDOW_DURATION);
    TestStream<GameActionInfo> createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)).advanceWatermarkTo(baseTime).addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(8))).advanceProcessingTime(Duration.standardMinutes(10)).advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))).addElements(event(TestUser.RED_ONE, 3, Duration.standardMinutes(1)), event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)), event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(5))).advanceWatermarkTo(firstWindowCloses.minus(Duration.standardMinutes(1))).addElements(event(TestUser.RED_TWO, 2, Duration.ZERO), event(TestUser.RED_TWO, 5, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 3, Duration.standardMinutes(3))).advanceProcessingTime(Duration.standardMinutes(12)).addElements(event(TestUser.RED_TWO, 9, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 1, Duration.standardMinutes(3))).advanceWatermarkToInfinity();
    PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
    BoundedWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION);
    String blueTeam = TestUser.BLUE_ONE.getTeam();
    String redTeam = TestUser.RED_ONE.getTeam();
    PAssert.that(teamScores).inWindow(window).satisfies((SerializableFunction<Iterable<KV<String, Integer>>, Void>) input -> {
        assertThat(input, hasItem(KV.of(blueTeam, 11)));
        assertThat(input, hasItem(KV.of(redTeam, 27)));
        return null;
    });
    PAssert.thatMap(teamScores).inOnTimePane(window).isEqualTo(ImmutableMap.<String, Integer>builder().put(redTeam, 7).put(blueTeam, 11).build());
    // No final pane is emitted for the blue team, as all of their updates have been taken into
    // account in earlier panes
    PAssert.that(teamScores).inFinalPane(window).containsInAnyOrder(KV.of(redTeam, 27));
    p.run().waitUntilFinish();
}
Also used : GameActionInfo(com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo) KV(org.apache.beam.sdk.values.KV) Duration(org.joda.time.Duration) RunWith(org.junit.runner.RunWith) SerializableFunction(org.apache.beam.sdk.transforms.SerializableFunction) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) CalculateUserScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateUserScores) PTransform(org.apache.beam.sdk.transforms.PTransform) Assert.assertThat(org.junit.Assert.assertThat) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) TimestampedValue(org.apache.beam.sdk.values.TimestampedValue) ImmutableMap(com.google.common.collect.ImmutableMap) PAssert(org.apache.beam.sdk.testing.PAssert) CalculateTeamScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateTeamScores) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) PCollection(org.apache.beam.sdk.values.PCollection) AvroCoder(org.apache.beam.sdk.coders.AvroCoder) Serializable(java.io.Serializable) Matchers.hasItem(org.hamcrest.Matchers.hasItem) Rule(org.junit.Rule) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) Instant(org.joda.time.Instant) TestStream(org.apache.beam.sdk.testing.TestStream) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) GameActionInfo(com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo) Instant(org.joda.time.Instant) CalculateTeamScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateTeamScores) KV(org.apache.beam.sdk.values.KV) BoundedWindow(org.apache.beam.sdk.transforms.windowing.BoundedWindow) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

TestStream (org.apache.beam.sdk.testing.TestStream)3 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)3 KV (org.apache.beam.sdk.values.KV)3 Instant (org.joda.time.Instant)3 Test (org.junit.Test)3 ImmutableMap (com.google.common.collect.ImmutableMap)2 Serializable (java.io.Serializable)2 AvroCoder (org.apache.beam.sdk.coders.AvroCoder)2 PipelineOptionsFactory (org.apache.beam.sdk.options.PipelineOptionsFactory)2 PAssert (org.apache.beam.sdk.testing.PAssert)2 TestPipeline (org.apache.beam.sdk.testing.TestPipeline)2 PTransform (org.apache.beam.sdk.transforms.PTransform)2 SerializableFunction (org.apache.beam.sdk.transforms.SerializableFunction)2 GlobalWindow (org.apache.beam.sdk.transforms.windowing.GlobalWindow)2 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)2 PCollection (org.apache.beam.sdk.values.PCollection)2 TimestampedValue (org.apache.beam.sdk.values.TimestampedValue)2 Matchers.hasItem (org.hamcrest.Matchers.hasItem)2 Duration (org.joda.time.Duration)2 Assert.assertThat (org.junit.Assert.assertThat)2