Search in sources :

Example 71 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class UserScoreTest method testTeamScoreSums.

/** Tests ExtractAndSumScore("team"). */
@Test
@Category(ValidatesRunner.class)
public void testTeamScoreSums() throws Exception {
    PCollection<String> input = p.apply(Create.of(GAME_EVENTS).withCoder(StringUtf8Coder.of()));
    PCollection<KV<String, Integer>> output = input.apply(ParDo.of(new ParseEventFn())).apply("ExtractTeamScore", new ExtractAndSumScore("team"));
    // Check the team score sums.
    PAssert.that(output).containsInAnyOrder(TEAM_SUMS);
    p.run().waitUntilFinish();
}
Also used : ExtractAndSumScore(org.apache.beam.examples.complete.game.UserScore.ExtractAndSumScore) KV(org.apache.beam.sdk.values.KV) ParseEventFn(org.apache.beam.examples.complete.game.UserScore.ParseEventFn) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 72 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class UserScoreTest method testUserScoresBadInput.

/** Test that bad input data is dropped appropriately. */
@Test
@Category(ValidatesRunner.class)
public void testUserScoresBadInput() throws Exception {
    PCollection<String> input = p.apply(Create.of(GAME_EVENTS2).withCoder(StringUtf8Coder.of()));
    PCollection<KV<String, Integer>> extract = input.apply(ParDo.of(new ParseEventFn())).apply(MapElements.into(TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())).via((GameActionInfo gInfo) -> KV.of(gInfo.getUser(), gInfo.getScore())));
    PAssert.that(extract).empty();
    p.run().waitUntilFinish();
}
Also used : GameActionInfo(org.apache.beam.examples.complete.game.UserScore.GameActionInfo) KV(org.apache.beam.sdk.values.KV) ParseEventFn(org.apache.beam.examples.complete.game.UserScore.ParseEventFn) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 73 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class WordCountTest method testWindowedWordCount.

@Test
public void testWindowedWordCount() throws Exception {
    String[] args = new String[] { "--runner=" + ApexRunner.class.getName() };
    ApexPipelineOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(ApexPipelineOptions.class);
    options.setApplicationName("StreamingWordCount");
    Pipeline p = Pipeline.create(options);
    PCollection<KV<String, Long>> wordCounts = p.apply(Read.from(new UnboundedTextSource())).apply(ParDo.of(new ExtractWordsFn())).apply(Window.<String>into(FixedWindows.of(Duration.standardSeconds(10)))).apply(Count.<String>perElement());
    wordCounts.apply(ParDo.of(new CollectResultsFn()));
    ApexRunnerResult result = (ApexRunnerResult) p.run();
    Assert.assertNotNull(result.getApexDAG().getOperatorMeta("Read(UnboundedTextSource)"));
    long timeout = System.currentTimeMillis() + 30000;
    while (System.currentTimeMillis() < timeout) {
        if (CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar")) {
            break;
        }
        result.waitUntilFinish(Duration.millis(1000));
    }
    result.cancel();
    Assert.assertTrue(CollectResultsFn.RESULTS.containsKey("foo") && CollectResultsFn.RESULTS.containsKey("bar"));
    CollectResultsFn.RESULTS.clear();
}
Also used : ApexRunner(org.apache.beam.runners.apex.ApexRunner) TestApexRunner(org.apache.beam.runners.apex.TestApexRunner) ApexRunnerResult(org.apache.beam.runners.apex.ApexRunnerResult) KV(org.apache.beam.sdk.values.KV) ApexPipelineOptions(org.apache.beam.runners.apex.ApexPipelineOptions) TestPipeline(org.apache.beam.sdk.testing.TestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) Test(org.junit.Test)

Example 74 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class TopWikipediaSessionsITCase method testProgram.

@Override
protected void testProgram() throws Exception {
    Pipeline p = FlinkTestPipeline.createForStreaming();
    Long now = (System.currentTimeMillis() + 10000) / 1000;
    PCollection<KV<String, Long>> output = p.apply(Create.of(Arrays.asList(new TableRow().set("timestamp", now).set("contributor_username", "user1"), new TableRow().set("timestamp", now + 10).set("contributor_username", "user3"), new TableRow().set("timestamp", now).set("contributor_username", "user2"), new TableRow().set("timestamp", now).set("contributor_username", "user1"), new TableRow().set("timestamp", now + 2).set("contributor_username", "user1"), new TableRow().set("timestamp", now).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 1).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 5).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 7).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 8).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 200).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 230).set("contributor_username", "user1"), new TableRow().set("timestamp", now + 230).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 240).set("contributor_username", "user2"), new TableRow().set("timestamp", now + 245).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 235).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 236).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 237).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 238).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 239).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 240).set("contributor_username", "user3"), new TableRow().set("timestamp", now + 241).set("contributor_username", "user2"), new TableRow().set("timestamp", now).set("contributor_username", "user3")))).apply(ParDo.of(new DoFn<TableRow, String>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            TableRow row = c.element();
            long timestamp = (Integer) row.get("timestamp");
            String userName = (String) row.get("contributor_username");
            if (userName != null) {
                // Sets the timestamp field to be used in windowing.
                c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
            }
        }
    })).apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1)))).apply(Count.<String>perElement());
    PCollection<String> format = output.apply(ParDo.of(new DoFn<KV<String, Long>, String>() {

        @ProcessElement
        public void processElement(ProcessContext c) throws Exception {
            KV<String, Long> el = c.element();
            String out = "user: " + el.getKey() + " value:" + el.getValue();
            c.output(out);
        }
    }));
    format.apply(TextIO.write().to(resultPath));
    p.run();
}
Also used : Instant(org.joda.time.Instant) KV(org.apache.beam.sdk.values.KV) FlinkTestPipeline(org.apache.beam.runners.flink.FlinkTestPipeline) Pipeline(org.apache.beam.sdk.Pipeline) DoFn(org.apache.beam.sdk.transforms.DoFn) TableRow(com.google.api.services.bigquery.model.TableRow)

Example 75 with KV

use of org.apache.beam.sdk.values.KV in project beam by apache.

the class BatchViewOverridesTest method testToIsmRecordForMapLikeDoFn.

@Test
public void testToIsmRecordForMapLikeDoFn() throws Exception {
    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForSizeTag = new TupleTag<>();
    TupleTag<KV<Integer, KV<IntervalWindow, Long>>> outputForEntrySetTag = new TupleTag<>();
    Coder<Long> keyCoder = VarLongCoder.of();
    Coder<IntervalWindow> windowCoder = IntervalWindow.getCoder();
    IsmRecordCoder<WindowedValue<Long>> ismCoder = IsmRecordCoder.of(1, 2, ImmutableList.<Coder<?>>of(MetadataKeyCoder.of(keyCoder), IntervalWindow.getCoder(), BigEndianLongCoder.of()), FullWindowedValueCoder.of(VarLongCoder.of(), windowCoder));
    DoFnTester<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>, IsmRecord<WindowedValue<Long>>> doFnTester = DoFnTester.of(new BatchViewOverrides.BatchViewAsMultimap.ToIsmRecordForMapLikeDoFn<>(outputForSizeTag, outputForEntrySetTag, windowCoder, keyCoder, ismCoder, false));
    IntervalWindow windowA = new IntervalWindow(new Instant(0), new Instant(10));
    IntervalWindow windowB = new IntervalWindow(new Instant(10), new Instant(20));
    IntervalWindow windowC = new IntervalWindow(new Instant(20), new Instant(30));
    Iterable<KV<Integer, Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>>> inputElements = ImmutableList.of(KV.of(1, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(KV.of(KV.of(1L, windowA), WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)), // same window same key as to previous
    KV.of(KV.of(1L, windowA), WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)), // same window different key as to previous
    KV.of(KV.of(2L, windowA), WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)), // different window same key as to previous
    KV.of(KV.of(2L, windowB), WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)), // different window and different key as to previous
    KV.of(KV.of(3L, windowB), WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)))), KV.of(2, (Iterable<KV<KV<Long, IntervalWindow>, WindowedValue<Long>>>) ImmutableList.of(// different shard
    KV.of(KV.of(4L, windowC), WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING)))));
    // The order of the output elements is important relative to processing order
    assertThat(doFnTester.processBundle(inputElements), contains(IsmRecord.of(ImmutableList.of(1L, windowA, 0L), WindowedValue.of(110L, new Instant(1), windowA, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(1L, windowA, 1L), WindowedValue.of(111L, new Instant(2), windowA, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(2L, windowA, 0L), WindowedValue.of(120L, new Instant(3), windowA, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(2L, windowB, 0L), WindowedValue.of(210L, new Instant(11), windowB, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(3L, windowB, 0L), WindowedValue.of(220L, new Instant(12), windowB, PaneInfo.NO_FIRING)), IsmRecord.of(ImmutableList.of(4L, windowC, 0L), WindowedValue.of(330L, new Instant(21), windowC, PaneInfo.NO_FIRING))));
    // Verify the number of unique keys per window.
    assertThat(doFnTester.takeOutputElements(outputForSizeTag), contains(KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)), KV.of(windowA, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)), KV.of(windowB, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)), KV.of(windowC, 1L))));
    // Verify the output for the unique keys.
    assertThat(doFnTester.takeOutputElements(outputForEntrySetTag), contains(KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)), KV.of(windowA, 1L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowA)), KV.of(windowA, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)), KV.of(windowB, 2L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowB)), KV.of(windowB, 3L)), KV.of(ismCoder.hash(ImmutableList.of(IsmFormat.getMetadataKey(), windowC)), KV.of(windowC, 4L))));
}
Also used : Instant(org.joda.time.Instant) TupleTag(org.apache.beam.sdk.values.TupleTag) IsmRecord(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord) KV(org.apache.beam.sdk.values.KV) WindowedValue(org.apache.beam.sdk.util.WindowedValue) IntervalWindow(org.apache.beam.sdk.transforms.windowing.IntervalWindow) Test(org.junit.Test)

Aggregations

KV (org.apache.beam.sdk.values.KV)192 Test (org.junit.Test)143 Instant (org.joda.time.Instant)66 Category (org.junit.experimental.categories.Category)62 Pipeline (org.apache.beam.sdk.Pipeline)35 IntervalWindow (org.apache.beam.sdk.transforms.windowing.IntervalWindow)34 StringUtils.byteArrayToJsonString (org.apache.beam.sdk.util.StringUtils.byteArrayToJsonString)33 Matchers.containsString (org.hamcrest.Matchers.containsString)33 StateSpec (org.apache.beam.sdk.state.StateSpec)25 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)22 ArrayList (java.util.ArrayList)19 WindowedValue (org.apache.beam.sdk.util.WindowedValue)19 TupleTag (org.apache.beam.sdk.values.TupleTag)16 TableRow (com.google.api.services.bigquery.model.TableRow)15 Map (java.util.Map)15 ValueState (org.apache.beam.sdk.state.ValueState)15 List (java.util.List)14 ImmutableList (com.google.common.collect.ImmutableList)12 HashMap (java.util.HashMap)12 Timer (org.apache.beam.sdk.state.Timer)12