use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class SimpleDoFnRunnerTest method testTimerSet.
/**
* Tests that a users call to set a timer gets properly dispatched to the timer internals. From
* there on, it is the duty of the runner & step context to set it in whatever way is right for
* that runner.
*/
@Test
public void testTimerSet() {
WindowFn<?, ?> windowFn = new GlobalWindows();
DoFnWithTimers<GlobalWindow> fn = new DoFnWithTimers(windowFn.windowCoder());
DoFnRunner<String, String> runner = new SimpleDoFnRunner<>(null, fn, NullSideInputReader.empty(), null, null, Collections.<TupleTag<?>>emptyList(), mockStepContext, WindowingStrategy.of(new GlobalWindows()));
// Setting the timer needs the current time, as it is set relative
Instant currentTime = new Instant(42);
when(mockTimerInternals.currentInputWatermarkTime()).thenReturn(currentTime);
runner.processElement(WindowedValue.valueInGlobalWindow("anyValue"));
verify(mockTimerInternals).setTimer(StateNamespaces.window(new GlobalWindows().windowCoder(), GlobalWindow.INSTANCE), DoFnWithTimers.TIMER_ID, currentTime.plus(DoFnWithTimers.TIMER_OFFSET), TimeDomain.EVENT_TIME);
}
use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class SimpleDoFnRunnerTest method testOnTimerCalled.
/**
* Tests that {@link SimpleDoFnRunner#onTimer} properly dispatches to the underlying
* {@link DoFn}.
*/
@Test
public void testOnTimerCalled() {
WindowFn<?, GlobalWindow> windowFn = new GlobalWindows();
DoFnWithTimers<GlobalWindow> fn = new DoFnWithTimers(windowFn.windowCoder());
DoFnRunner<String, String> runner = new SimpleDoFnRunner<>(null, fn, NullSideInputReader.empty(), null, null, Collections.<TupleTag<?>>emptyList(), mockStepContext, WindowingStrategy.of(windowFn));
Instant currentTime = new Instant(42);
Duration offset = Duration.millis(37);
// Mocking is not easily compatible with annotation analysis, so we manually record
// the method call.
runner.onTimer(DoFnWithTimers.TIMER_ID, GlobalWindow.INSTANCE, currentTime.plus(offset), TimeDomain.EVENT_TIME);
assertThat(fn.onTimerInvocations, contains(TimerData.of(DoFnWithTimers.TIMER_ID, StateNamespaces.window(windowFn.windowCoder(), GlobalWindow.INSTANCE), currentTime.plus(offset), TimeDomain.EVENT_TIME)));
}
use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class SimpleDoFnRunnerTest method testBackwardsInTimeNoSkew.
/**
* Demonstrates that attempting to output an element before the timestamp of the current element
* with zero {@link DoFn#getAllowedTimestampSkew() allowed timestamp skew} throws.
*/
@Test
public void testBackwardsInTimeNoSkew() {
SkewingDoFn fn = new SkewingDoFn(Duration.ZERO);
DoFnRunner<Duration, Duration> runner = new SimpleDoFnRunner<>(null, fn, NullSideInputReader.empty(), new ListOutputManager(), new TupleTag<Duration>(), Collections.<TupleTag<?>>emptyList(), mockStepContext, WindowingStrategy.of(new GlobalWindows()));
runner.startBundle();
// An element output at the current timestamp is fine.
runner.processElement(WindowedValue.timestampedValueInGlobalWindow(Duration.ZERO, new Instant(0)));
thrown.expect(UserCodeException.class);
thrown.expectCause(isA(IllegalArgumentException.class));
thrown.expectMessage("must be no earlier");
thrown.expectMessage(String.format("timestamp of the current input (%s)", new Instant(0).toString()));
thrown.expectMessage(String.format("the allowed skew (%s)", PeriodFormat.getDefault().print(Duration.ZERO.toPeriod())));
// An element output before (current time - skew) is forbidden
runner.processElement(WindowedValue.timestampedValueInGlobalWindow(Duration.millis(1L), new Instant(0)));
}
use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class ViewTest method testWindowedSideInputFixedToGlobal.
@Test
@Category(ValidatesRunner.class)
public void testWindowedSideInputFixedToGlobal() {
final PCollectionView<Integer> view = pipeline.apply("CreateSideInput", Create.timestamped(TimestampedValue.of(1, new Instant(1)), TimestampedValue.of(2, new Instant(11)), TimestampedValue.of(3, new Instant(13)))).apply("WindowSideInput", Window.<Integer>into(new GlobalWindows())).apply(Sum.integersGlobally()).apply(View.<Integer>asSingleton());
PCollection<String> output = pipeline.apply("CreateMainInput", Create.timestamped(TimestampedValue.of("A", new Instant(4)), TimestampedValue.of("B", new Instant(15)), TimestampedValue.of("C", new Instant(7)))).apply("WindowMainInput", Window.<String>into(FixedWindows.of(Duration.millis(10)))).apply("OutputMainAndSideInputs", ParDo.of(new DoFn<String, String>() {
@ProcessElement
public void processElement(ProcessContext c) {
c.output(c.element() + c.sideInput(view));
}
}).withSideInputs(view));
PAssert.that(output).containsInAnyOrder("A6", "B6", "C6");
pipeline.run();
}
use of org.apache.beam.sdk.transforms.windowing.GlobalWindows in project beam by apache.
the class StreamingWriteTables method expand.
@Override
public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
// A naive implementation would be to simply stream data directly to BigQuery.
// However, this could occasionally lead to duplicated data, e.g., when
// a VM that runs this code is restarted and the code is re-run.
// The above risk is mitigated in this implementation by relying on
// BigQuery built-in best effort de-dup mechanism.
// To use this mechanism, each input TableRow is tagged with a generated
// unique id, which is then passed to BigQuery and used to ignore duplicates
// We create 50 keys per BigQuery table to generate output on. This is few enough that we
// get good batching into BigQuery's insert calls, and enough that we can max out the
// streaming insert quota.
PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input.apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50))).setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of())).apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()));
// To prevent having the same TableRow processed more than once with regenerated
// different unique ids, this implementation relies on "checkpointing", which is
// achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
// performed by Reshuffle.
TupleTag<Void> mainOutputTag = new TupleTag<>("mainOutput");
TupleTag<TableRow> failedInsertsTag = new TupleTag<>("failedInserts");
PCollectionTuple tuple = tagged.setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of())).apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of()).apply("GlobalWindow", Window.<KV<ShardedKey<String>, TableRowInfo>>into(new GlobalWindows()).triggering(DefaultTrigger.of()).discardingFiredPanes()).apply("StreamingWrite", ParDo.of(new StreamingWriteFn(bigQueryServices, retryPolicy, failedInsertsTag)).withOutputTags(mainOutputTag, TupleTagList.of(failedInsertsTag)));
PCollection<TableRow> failedInserts = tuple.get(failedInsertsTag);
failedInserts.setCoder(TableRowJsonCoder.of());
return WriteResult.in(input.getPipeline(), failedInsertsTag, failedInserts);
}
Aggregations