Search in sources :

Example 11 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class SimpleDoFnRunnerTest method testTimerSet.

/**
   * Tests that a users call to set a timer gets properly dispatched to the timer internals. From
   * there on, it is the duty of the runner & step context to set it in whatever way is right for
   * that runner.
   */
@Test
public void testTimerSet() {
    WindowFn<?, ?> windowFn = new GlobalWindows();
    DoFnWithTimers<GlobalWindow> fn = new DoFnWithTimers(windowFn.windowCoder());
    DoFnRunner<String, String> runner = new SimpleDoFnRunner<>(null, fn, NullSideInputReader.empty(), null, null, Collections.<TupleTag<?>>emptyList(), mockStepContext, WindowingStrategy.of(new GlobalWindows()));
    // Setting the timer needs the current time, as it is set relative
    Instant currentTime = new Instant(42);
    when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(currentTime);
    runner.processElement(WindowedValue.valueInGlobalWindow("anyValue"));
    verify(mockTimerInternals).setTimer(StateNamespaces.window(new GlobalWindows().windowCoder(), GlobalWindow.INSTANCE), DoFnWithTimers.TIMER_ID, currentTime.plus(DoFnWithTimers.TIMER_OFFSET), TimeDomain.EVENT_TIME);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) Instant(org.joda.time.Instant) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Test(org.junit.Test)

Example 12 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class SimpleDoFnRunnerTest method testOnTimerCalled.

/**
   * Tests that {@link SimpleDoFnRunner#onTimer} properly dispatches to the underlying
   * {@link DoFn}.
   */
@Test
public void testOnTimerCalled() {
    WindowFn<?, GlobalWindow> windowFn = new GlobalWindows();
    DoFnWithTimers<GlobalWindow> fn = new DoFnWithTimers(windowFn.windowCoder());
    DoFnRunner<String, String> runner = new SimpleDoFnRunner<>(null, fn, NullSideInputReader.empty(), null, null, Collections.<TupleTag<?>>emptyList(), mockStepContext, WindowingStrategy.of(windowFn));
    Instant currentTime = new Instant(42);
    Duration offset = Duration.millis(37);
    // Mocking is not easily compatible with annotation analysis, so we manually record
    // the method call.
    runner.onTimer(DoFnWithTimers.TIMER_ID, GlobalWindow.INSTANCE, currentTime.plus(offset), TimeDomain.EVENT_TIME);
    assertThat(fn.onTimerInvocations, contains(TimerData.of(DoFnWithTimers.TIMER_ID, StateNamespaces.window(windowFn.windowCoder(), GlobalWindow.INSTANCE), currentTime.plus(offset), TimeDomain.EVENT_TIME)));
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) GlobalWindow(org.apache.beam.sdk.transforms.windowing.GlobalWindow) Test(org.junit.Test)

Example 13 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class SimpleDoFnRunnerTest method testBackwardsInTimeNoSkew.

/**
   * Demonstrates that attempting to output an element before the timestamp of the current element
   * with zero {@link DoFn#getAllowedTimestampSkew() allowed timestamp skew} throws.
   */
@Test
public void testBackwardsInTimeNoSkew() {
    SkewingDoFn fn = new SkewingDoFn(Duration.ZERO);
    DoFnRunner<Duration, Duration> runner = new SimpleDoFnRunner<>(null, fn, NullSideInputReader.empty(), new ListOutputManager(), new TupleTag<Duration>(), Collections.<TupleTag<?>>emptyList(), mockStepContext, WindowingStrategy.of(new GlobalWindows()));
    runner.startBundle();
    // An element output at the current timestamp is fine.
    runner.processElement(WindowedValue.timestampedValueInGlobalWindow(Duration.ZERO, new Instant(0)));
    thrown.expect(UserCodeException.class);
    thrown.expectCause(isA(IllegalArgumentException.class));
    thrown.expectMessage("must be no earlier");
    thrown.expectMessage(String.format("timestamp of the current input (%s)", new Instant(0).toString()));
    thrown.expectMessage(String.format("the allowed skew (%s)", PeriodFormat.getDefault().print(Duration.ZERO.toPeriod())));
    // An element output before (current time - skew) is forbidden
    runner.processElement(WindowedValue.timestampedValueInGlobalWindow(Duration.millis(1L), new Instant(0)));
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) Instant(org.joda.time.Instant) Duration(org.joda.time.Duration) Test(org.junit.Test)

Example 14 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class ViewTest method testWindowedSideInputFixedToGlobal.

@Test
@Category(ValidatesRunner.class)
public void testWindowedSideInputFixedToGlobal() {
    final PCollectionView<Integer> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13)))).apply("WindowSideInput", Window.<Integer>into(new GlobalWindows())).apply(Sum.integersGlobally()).apply(View.<Integer>asSingleton());
    PCollection<String> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of("A", new Instant(4)), TimestampedValue.of("B", new Instant(15)), TimestampedValue.of("C", new Instant(7)))).apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10)))).apply("OutputMainAndSideInputs", ParDo.of(new DoFn<String, String>() {

        @ProcessElement
        public void processElement(ProcessContext c) {
            c.output(c.element() + c.sideInput(view));
        }
    }).withSideInputs(view));
    PAssert.that(output).containsInAnyOrder("A6", "B6", "C6");
    pipeline.run();
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) Instant(org.joda.time.Instant) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 15 with GlobalWindows

use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.

the class StreamingWriteTables method expand.

@Override
public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
    // A naive implementation would be to simply stream data directly to BigQuery.
    // However, this could occasionally lead to duplicated data, e.g., when
    // a VM that runs this code is restarted and the code is re-run.
    // The above risk is mitigated in this implementation by relying on
    // BigQuery built-in best effort de-dup mechanism.
    // To use this mechanism, each input TableRow is tagged with a generated
    // unique id, which is then passed to BigQuery and used to ignore duplicates
    // We create 50 keys per BigQuery table to generate output on. This is few enough that we
    // get good batching into BigQuery's insert calls, and enough that we can max out the
    // streaming insert quota.
    PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50))).setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of())).apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()));
    // To prevent having the same TableRow processed more than once with regenerated
    // different unique ids, this implementation relies on "checkpointing", which is
    // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
    // performed by Reshuffle.
    TupleTag<Void> mainOutputTag = new TupleTag<>("mainOutput");
    TupleTag<TableRow> failedInsertsTag = new TupleTag<>("failedInserts");
    PCollectionTuple tuple = tagged.setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of())).apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of()).apply("GlobalWindow", Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes()).apply("StreamingWrite", ParDo.of(new StreamingWriteFn(bigQueryServices, retryPolicy, failedInsertsTag)).withOutputTags(mainOutputTag, TupleTagList.of(failedInsertsTag)));
    PCollection<TableRow> failedInserts = tuple.get(failedInsertsTag);
    failedInserts.setCoder(TableRowJsonCoder.of());
    return WriteResult.in(input.getPipeline(), failedInsertsTag, failedInserts);
}
Also used : GlobalWindows(org.apache.beam.sdk.transforms.windowing.GlobalWindows) TupleTag(org.apache.beam.sdk.values.TupleTag) KV(org.apache.beam.sdk.values.KV) TableRow(com.google.api.services.bigquery.model.TableRow) PCollectionTuple(org.apache.beam.sdk.values.PCollectionTuple)

Aggregations

GlobalWindows (org.apache.beam.sdk.transforms.windowing.GlobalWindows)19 Test (org.junit.Test)15 Instant (org.joda.time.Instant)10 GlobalWindow (org.apache.beam.sdk.transforms.windowing.GlobalWindow)8 Duration (org.joda.time.Duration)6 KV (org.apache.beam.sdk.values.KV)4 Pipeline (org.apache.beam.sdk.Pipeline)3 Category (org.junit.experimental.categories.Category)3 TableRow (com.google.api.services.bigquery.model.TableRow)2 List (java.util.List)2 WindowMatchers.isSingleWindowedValue (org.apache.beam.runners.core.WindowMatchers.isSingleWindowedValue)2 WindowMatchers.isWindowedValue (org.apache.beam.runners.core.WindowMatchers.isWindowedValue)2 WindowedValue (org.apache.beam.sdk.util.WindowedValue)2 PCollectionTuple (org.apache.beam.sdk.values.PCollectionTuple)2 PCollectionView (org.apache.beam.sdk.values.PCollectionView)2 TupleTag (org.apache.beam.sdk.values.TupleTag)2 Matchers.emptyIterable (org.hamcrest.Matchers.emptyIterable)2 TableReference (com.google.api.services.bigquery.model.TableReference)1 ImmutableList (com.google.common.collect.ImmutableList)1 Map (java.util.Map)1