use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class ParDoTest method testOutOfBoundsEventTimeTimer.
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testOutOfBoundsEventTimeTimer() throws Exception {
final String timerId = "foo";
DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(ProcessContext context, BoundedWindow window, @TimerId(timerId) Timer timer) {
timer.set(window.maxTimestamp().plus(1L));
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context) {
}
};
PCollection<Integer> output = pipeline.apply(Create.of(KV.of("hello", 37))).apply(ParDo.of(fn));
thrown.expect(RuntimeException.class);
// Note that runners can reasonably vary their message - this matcher should be flexible
// and can be evolved.
thrown.expectMessage("event time timer");
thrown.expectMessage("expiration");
pipeline.run();
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class ParDoTest method testEventTimeTimerMultipleKeys.
/**
* Tests that event time timers for multiple keys both fire. This particularly exercises
* implementations that may GC in ways not simply governed by the watermark.
*/
@Test
@Category({ ValidatesRunner.class, UsesTimersInParDo.class })
public void testEventTimeTimerMultipleKeys() throws Exception {
final String timerId = "foo";
final String stateId = "sizzle";
final int offset = 5000;
final int timerOutput = 4093;
DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
context.output(KV.of(state.read(), timerOutput));
}
};
// Enough keys that we exercise interesting code paths
int numKeys = 50;
List<KV<String, Integer>> input = new ArrayList<>();
List<KV<String, Integer>> expectedOutput = new ArrayList<>();
for (Integer key = 0; key < numKeys; ++key) {
// Each key should have just one final output at GC time
expectedOutput.add(KV.of(key.toString(), timerOutput));
for (int i = 0; i < 15; ++i) {
// Each input should be output with the offset added
input.add(KV.of(key.toString(), i));
expectedOutput.add(KV.of(key.toString(), i + offset));
}
}
Collections.shuffle(input);
PCollection<KV<String, Integer>> output = pipeline.apply(Create.of(input)).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(expectedOutput);
pipeline.run();
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class ParDoTest method testSimpleProcessingTimerTimer.
@Test
@Category({ NeedsRunner.class, UsesTimersInParDo.class, UsesTestStream.class })
public void testSimpleProcessingTimerTimer() throws Exception {
final String timerId = "foo";
DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer) {
timer.offset(Duration.standardSeconds(1)).setRelative();
context.output(3);
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context) {
context.output(42);
}
};
TestStream<KV<String, Integer>> stream = TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())).addElements(KV.of("hello", 37)).advanceProcessingTime(Duration.standardSeconds(2)).advanceWatermarkToInfinity();
PCollection<Integer> output = pipeline.apply(stream).apply(ParDo.of(fn));
PAssert.that(output).containsInAnyOrder(3, 42);
pipeline.run();
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class DoFnOperatorTest method testStateGCForStatefulFn.
@Test
public void testStateGCForStatefulFn() throws Exception {
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(new Duration(10))).withAllowedLateness(Duration.ZERO);
final String timerId = "boo";
final String stateId = "dazzle";
final int offset = 5000;
final int timerOutput = 4093;
DoFn<KV<String, Integer>, KV<String, Integer>> fn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
context.output(KV.of(state.read(), timerOutput));
}
};
WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder = WindowedValue.getFullCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), windowingStrategy.getWindowFn().windowCoder());
TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
DoFnOperator<KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator = new DoFnOperator<>(fn, "stepName", windowedValueCoder, outputTag, Collections.<TupleTag<?>>emptyList(), new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(), windowingStrategy, new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
Collections.<PCollectionView<?>>emptyList(), /* side inputs */
PipelineOptionsFactory.as(FlinkPipelineOptions.class), StringUtf8Coder.of());
KeyedOneInputStreamOperatorTestHarness<String, WindowedValue<KV<String, Integer>>, WindowedValue<KV<String, Integer>>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, new KeySelector<WindowedValue<KV<String, Integer>>, String>() {
@Override
public String getKey(WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
return kvWindowedValue.getValue().getKey();
}
}, new CoderTypeInformation<>(StringUtf8Coder.of()));
testHarness.open();
testHarness.processWatermark(0);
assertEquals(0, testHarness.numKeyedStateEntries());
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
testHarness.processElement(new StreamRecord<>(WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
assertEquals(2, testHarness.numKeyedStateEntries());
testHarness.getOutput().clear();
// this should trigger both the window.maxTimestamp() timer and the GC timer
// this tests that the GC timer fires after the user timer
testHarness.processWatermark(window1.maxTimestamp().plus(windowingStrategy.getAllowedLateness()).plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS).getMillis());
assertThat(this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING), WindowedValue.of(KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
// ensure the state was garbage collected
assertEquals(0, testHarness.numKeyedStateEntries());
testHarness.close();
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class ProcessBundleDescriptorsTest method testLengthPrefixingOfKeyCoderInStatefulExecutableStage.
/**
* Tests that a stateful stage will wrap the key coder of a stateful transform in a
* LengthPrefixCoder.
*/
@Test
public void testLengthPrefixingOfKeyCoderInStatefulExecutableStage() throws Exception {
// Add another stateful stage with a non-standard key coder
Pipeline p = Pipeline.create();
Coder<Void> keycoder = VoidCoder.of();
assertThat(ModelCoderRegistrar.isKnownCoder(keycoder), is(false));
p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], KV<Void, String>>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
})).setCoder(KvCoder.of(keycoder, StringUtf8Coder.of())).apply("userState", ParDo.of(new DoFn<KV<Void, String>, KV<Void, String>>() {
@StateId("stateId")
private final StateSpec<BagState<String>> bufferState = StateSpecs.bag(StringUtf8Coder.of());
@TimerId("timerId")
private final TimerSpec timerSpec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void processElement(@Element KV<Void, String> element, @StateId("stateId") BagState<String> state, @TimerId("timerId") Timer timer, OutputReceiver<KV<Void, String>> r) {
}
@OnTimer("timerId")
public void onTimer() {
}
})).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> stage.getUserStates().stream().anyMatch(spec -> spec.localName().equals("stateId")));
checkState(optionalStage.isPresent(), "Expected a stage with user state.");
ExecutableStage stage = optionalStage.get();
PipelineNode.PCollectionNode inputPCollection = stage.getInputPCollection();
// Ensure original key coder is not a LengthPrefixCoder
Map<String, RunnerApi.Coder> stageCoderMap = stage.getComponents().getCodersMap();
RunnerApi.Coder originalMainInputCoder = stageCoderMap.get(inputPCollection.getPCollection().getCoderId());
String originalKeyCoderId = ModelCoders.getKvCoderComponents(originalMainInputCoder).keyCoderId();
RunnerApi.Coder originalKeyCoder = stageCoderMap.get(originalKeyCoderId);
assertThat(originalKeyCoder.getSpec().getUrn(), is(CoderTranslation.JAVA_SERIALIZED_CODER_URN));
// Now create ProcessBundleDescriptor and check for the LengthPrefixCoder around the key coder
BeamFnApi.ProcessBundleDescriptor pbd = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, Endpoints.ApiServiceDescriptor.getDefaultInstance()).getProcessBundleDescriptor();
Map<String, RunnerApi.Coder> pbsCoderMap = pbd.getCodersMap();
RunnerApi.Coder pbsMainInputCoder = pbsCoderMap.get(pbd.getPcollectionsOrThrow(inputPCollection.getId()).getCoderId());
String keyCoderId = ModelCoders.getKvCoderComponents(pbsMainInputCoder).keyCoderId();
RunnerApi.Coder keyCoder = pbsCoderMap.get(keyCoderId);
ensureLengthPrefixed(keyCoder, originalKeyCoder, pbsCoderMap);
TimerReference timerRef = Iterables.getOnlyElement(stage.getTimers());
String timerTransformId = timerRef.transform().getId();
RunnerApi.ParDoPayload parDoPayload = RunnerApi.ParDoPayload.parseFrom(pbd.getTransformsOrThrow(timerTransformId).getSpec().getPayload());
RunnerApi.TimerFamilySpec timerSpec = parDoPayload.getTimerFamilySpecsOrThrow(timerRef.localName());
RunnerApi.Coder timerCoder = pbsCoderMap.get(timerSpec.getTimerFamilyCoderId());
String timerKeyCoderId = timerCoder.getComponentCoderIds(0);
RunnerApi.Coder timerKeyCoder = pbsCoderMap.get(timerKeyCoderId);
ensureLengthPrefixed(timerKeyCoder, originalKeyCoder, pbsCoderMap);
}
Aggregations