Search in sources :

Example 96 with KV

use of org.apache.beam.sdk.values.KV in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class LeaderBoardTest method testTeamScoresSpeculative.

/**
   * A test of the {@link CalculateTeamScores} {@link PTransform} when all of the elements arrive
   * on time, and the processing time advances far enough for speculative panes.
   */
@Test
public void testTeamScoresSpeculative() {
    TestStream<GameActionInfo> createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)).advanceWatermarkTo(baseTime).addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), event(TestUser.BLUE_ONE, 2, Duration.standardMinutes(1))).advanceProcessingTime(Duration.standardMinutes(10)).addElements(event(TestUser.RED_TWO, 5, Duration.standardMinutes(3))).advanceProcessingTime(Duration.standardMinutes(12)).addElements(event(TestUser.BLUE_TWO, 3, Duration.standardSeconds(22))).advanceProcessingTime(Duration.standardMinutes(10)).addElements(event(TestUser.RED_ONE, 4, Duration.standardMinutes(4)), event(TestUser.BLUE_TWO, 2, Duration.standardMinutes(2))).advanceWatermarkToInfinity();
    PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
    String blueTeam = TestUser.BLUE_ONE.getTeam();
    String redTeam = TestUser.RED_ONE.getTeam();
    IntervalWindow window = new IntervalWindow(baseTime, TEAM_WINDOW_DURATION);
    // The window contains speculative panes alongside the on-time pane
    PAssert.that(teamScores).inWindow(window).containsInAnyOrder(KV.of(blueTeam, 10), /* The on-time blue pane */
    KV.of(redTeam, 9), /* The on-time red pane */
    KV.of(blueTeam, 5), /* The first blue speculative pane */
    KV.of(blueTeam, 8), /* The second blue speculative pane */
    KV.of(redTeam, 5));
    PAssert.that(teamScores).inOnTimePane(window).containsInAnyOrder(KV.of(blueTeam, 10), KV.of(redTeam, 9));
    p.run().waitUntilFinish();
}
Also used : GameActionInfo(com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo) CalculateTeamScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateTeamScores) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 97 with KV

use of org.apache.beam.sdk.values.KV in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class LeaderBoardTest method testTeamScoresOnTime.

/**
   * A test of the {@link CalculateTeamScores} {@link PTransform} when all of the elements arrive
   * on time (ahead of the watermark).
   */
@Test
public void testTeamScoresOnTime() {
    TestStream<GameActionInfo> createEvents = TestStream.create(AvroCoder.of(GameActionInfo.class)).advanceWatermarkTo(baseTime).addElements(event(TestUser.BLUE_ONE, 3, Duration.standardSeconds(3)), event(TestUser.BLUE_ONE, 2, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 3, Duration.standardSeconds(22)), event(TestUser.BLUE_TWO, 5, Duration.standardMinutes(3))).advanceWatermarkTo(baseTime.plus(Duration.standardMinutes(3))).addElements(event(TestUser.RED_ONE, 1, Duration.standardMinutes(4)), event(TestUser.BLUE_ONE, 2, Duration.standardSeconds(270))).advanceWatermarkToInfinity();
    PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(new CalculateTeamScores(TEAM_WINDOW_DURATION, ALLOWED_LATENESS));
    String blueTeam = TestUser.BLUE_ONE.getTeam();
    String redTeam = TestUser.RED_ONE.getTeam();
    PAssert.that(teamScores).inOnTimePane(new IntervalWindow(baseTime, TEAM_WINDOW_DURATION)).containsInAnyOrder(KV.of(blueTeam, 12), KV.of(redTeam, 4));
    p.run().waitUntilFinish();
}
Also used : GameActionInfo(com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo) CalculateTeamScores(com.google.cloud.dataflow.examples.complete.game.LeaderBoard.CalculateTeamScores) KV(org.apache.beam.sdk.values.KV) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Example 98 with KV

use of org.apache.beam.sdk.values.KV in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class UserScoreTest method testTeamScoreSums.

/** Tests ExtractAndSumScore("team"). */
@Test
@Category(ValidatesRunner.class)
public void testTeamScoreSums() throws Exception {
    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
    PCollection<KV<String, Integer>> output = input.apply(ParDo.of(new ParseEventFn())).apply("ExtractTeamScore", new ExtractAndSumScore("team"));
    // Check the team score sums.
    PAssert.that(output).containsInAnyOrder(TEAM_SUMS);
    p.run().waitUntilFinish();
}
Also used : ExtractAndSumScore(com.google.cloud.dataflow.examples.complete.game.UserScore.ExtractAndSumScore) KV(org.apache.beam.sdk.values.KV) ParseEventFn(com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 99 with KV

use of org.apache.beam.sdk.values.KV in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class DebuggingWordCount method main.

public static void main(String[] args) {
    WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WordCountOptions.class);
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, Long>> filteredWords = p.apply("ReadLines", TextIO.read().from(options.getInputFile())).apply(new WordCount.CountWords()).apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));
    /**
     * Concept #3: PAssert is a set of convenient PTransforms in the style of
     * Hamcrest's collection matchers that can be used when writing Pipeline level tests
     * to validate the contents of PCollections. PAssert is best used in unit tests
     * with small data sets but is demonstrated here as a teaching tool.
     *
     * <p>Below we verify that the set of filtered words matches our expected counts. Note
     * that PAssert does not provide any output and that successful completion of the
     * Pipeline implies that the expectations were met. Learn more at
     * https://beam.apache.org/documentation/pipelines/test-your-pipeline/ on how to test
     * your Pipeline and see {@link DebuggingWordCountTest} for an example unit test.
     */
    List<KV<String, Long>> expectedResults = Arrays.asList(KV.of("Flourish", 3L), KV.of("stomach", 1L));
    PAssert.that(filteredWords).containsInAnyOrder(expectedResults);
    p.run().waitUntilFinish();
}
Also used : KV(org.apache.beam.sdk.values.KV) Pipeline(org.apache.beam.sdk.Pipeline)

Example 100 with KV

use of org.apache.beam.sdk.values.KV in project DataflowJavaSDK-examples by GoogleCloudPlatform.

the class HourlyTeamScoreTest method testUserScoresFilter.

/** Test the filtering. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresFilter() throws Exception {
    final Instant startMinTimestamp = new Instant(1447965680000L);
    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
    PCollection<KV<String, Integer>> output = input.apply("ParseGameEvent", ParDo.of(new ParseEventFn())).apply("FilterStartTime", Filter.by((GameActionInfo gInfo) -> gInfo.getTimestamp() > startMinTimestamp.getMillis())).apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())).via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));
    PAssert.that(output).containsInAnyOrder(FILTERED_EVENTS);
    p.run().waitUntilFinish();
}
Also used : GameActionInfo(com.google.cloud.dataflow.examples.complete.game.UserScore.GameActionInfo) Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) ParseEventFn(com.google.cloud.dataflow.examples.complete.game.UserScore.ParseEventFn) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Aggregations

KV (org.apache.beam.sdk.values.KV)192 Test (org.junit.Test)143 Instant (org.joda.time.Instant)66 Category (org.junit.experimental.categories.Category)62 Pipeline (org.apache.beam.sdk.Pipeline)35 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)34 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)33 Matchers.containsString (org.hamcrest.Matchers.containsString)33 StateSpec (org.apache.beam.sdk.state.StateSpec)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 ArrayList (java.util.ArrayList)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)19 TupleTag (org.apache.beam.sdk.values.TupleTag)16 TableRow (com.google.api.services.bigquery.model.TableRow)15 Map (java.util.Map)15 ValueState (org.apache.beam.sdk.state.ValueState)15 List (java.util.List)14 ImmutableList (com.google.common.collect.ImmutableList)12 HashMap (java.util.HashMap)12 Timer (org.apache.beam.sdk.state.Timer)12