use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class DoFnOperatorTest method testWatermarkContract.
/**
* This test specifically verifies that we correctly map Flink watermarks to Beam watermarks. In
* Beam, a watermark {@code T} guarantees there will not be elements with a timestamp {@code < T}
* in the future. In Flink, a watermark {@code T} guarantees there will not be elements with a
* timestamp {@code <= T} in the future. We have to make sure to take this into account when
* firing timers.
*
* <p>This does not test the timer API in general or processing-time timers because there are
* generic tests for this in {@code ParDoTest}.
*/
@Test
public void testWatermarkContract() throws Exception {
final Instant timerTimestamp = new Instant(1000);
final Instant timerOutputTimestamp = timerTimestamp.minus(Duration.millis(1));
final String eventTimeMessage = "Event timer fired: ";
final String processingTimeMessage = "Processing timer fired";
WindowingStrategy<Object, IntervalWindow> windowingStrategy = WindowingStrategy.of(FixedWindows.of(Duration.millis(10_000)));
final String eventTimerId = "eventTimer";
final String eventTimerId2 = "eventTimer2";
final String processingTimerId = "processingTimer";
DoFn<Integer, String> fn = new DoFn<Integer, String>() {
@TimerId(eventTimerId)
private final TimerSpec eventTimer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(eventTimerId2)
private final TimerSpec eventTimer2 = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId(processingTimerId)
private final TimerSpec processingTimer = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId(eventTimerId) Timer eventTimer, @TimerId(eventTimerId2) Timer eventTimerWithOutputTimestamp, @TimerId(processingTimerId) Timer processingTimer) {
eventTimer.set(timerTimestamp);
eventTimerWithOutputTimestamp.withOutputTimestamp(timerOutputTimestamp).set(timerTimestamp);
processingTimer.offset(Duration.millis(timerTimestamp.getMillis())).setRelative();
}
@OnTimer(eventTimerId)
public void onEventTime(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.timestamp());
context.outputWithTimestamp(eventTimeMessage + eventTimerId, context.timestamp());
}
@OnTimer(eventTimerId2)
public void onEventTime2(OnTimerContext context) {
assertEquals("Timer timestamp must match set timestamp.", timerTimestamp, context.fireTimestamp());
context.output(eventTimeMessage + eventTimerId2);
}
@OnTimer(processingTimerId)
public void onProcessingTime(OnTimerContext context) {
assertEquals(// See SimpleDoFnRunner#onTimer
"Timer timestamp must match current input watermark", timerTimestamp.plus(Duration.millis(1)), context.timestamp());
context.outputWithTimestamp(processingTimeMessage, context.timestamp());
}
};
VarIntCoder keyCoder = VarIntCoder.of();
WindowedValue.FullWindowedValueCoder<Integer> inputCoder = WindowedValue.getFullCoder(keyCoder, windowingStrategy.getWindowFn().windowCoder());
WindowedValue.FullWindowedValueCoder<String> outputCoder = WindowedValue.getFullCoder(StringUtf8Coder.of(), windowingStrategy.getWindowFn().windowCoder());
KeySelector<WindowedValue<Integer>, ByteBuffer> keySelector = e -> FlinkKeyUtils.encodeKey(e.getValue(), keyCoder);
TupleTag<String> outputTag = new TupleTag<>("main-output");
DoFnOperator<Integer, String> doFnOperator = new DoFnOperator<>(fn, "stepName", inputCoder, Collections.emptyMap(), outputTag, Collections.emptyList(), new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())), windowingStrategy, new HashMap<>(), /* side-input mapping */
Collections.emptyList(), /* side inputs */
FlinkPipelineOptions.defaults(), keyCoder, /* key coder */
keySelector, DoFnSchemaInformation.create(), Collections.emptyMap());
OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness = new KeyedOneInputStreamOperatorTestHarness<>(doFnOperator, keySelector, new CoderTypeInformation<>(FlinkKeyUtils.ByteBufferCoder.of(), FlinkPipelineOptions.defaults()));
testHarness.setup(new CoderTypeSerializer<>(outputCoder, new SerializablePipelineOptions(FlinkPipelineOptions.defaults())));
testHarness.open();
testHarness.processWatermark(0);
testHarness.setProcessingTime(0);
IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10_000));
// this should register the two timers above
testHarness.processElement(new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
// this does not yet fire the timers (in vanilla Flink it would)
testHarness.processWatermark(timerTimestamp.getMillis());
testHarness.setProcessingTime(timerTimestamp.getMillis());
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), emptyIterable());
assertThat(doFnOperator.keyedStateInternals.minWatermarkHoldMs(), is(timerOutputTimestamp.getMillis()));
// this must fire the event timers
testHarness.processWatermark(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), containsInAnyOrder(WindowedValue.of(eventTimeMessage + eventTimerId, timerTimestamp, window1, PaneInfo.NO_FIRING), WindowedValue.of(eventTimeMessage + eventTimerId2, timerTimestamp.minus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.getOutput().clear();
// this must fire the processing timer
testHarness.setProcessingTime(timerTimestamp.getMillis() + 1);
assertThat(stripStreamRecordFromWindowedValue(testHarness.getOutput()), contains(WindowedValue.of(// See SimpleDoFnRunner#onTimer
processingTimeMessage, timerTimestamp.plus(Duration.millis(1)), window1, PaneInfo.NO_FIRING)));
testHarness.close();
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class PortableTimersExecutionTest method testTimerExecution.
@Test(timeout = 120_000)
public void testTimerExecution() throws Exception {
FlinkPipelineOptions options = PipelineOptionsFactory.fromArgs("--experiments=beam_fn_api").as(FlinkPipelineOptions.class);
options.setRunner(CrashingRunner.class);
options.setFlinkMaster("[local]");
options.setStreaming(isStreaming);
options.setParallelism(2);
options.as(PortablePipelineOptions.class).setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
final String timerId = "foo";
final String stateId = "sizzle";
final int offset = 5000;
final int timerOutput = 4093;
// Enough keys that we exercise interesting code paths
int numKeys = 50;
int numDuplicateTimers = 15;
List<KV<String, Integer>> input = new ArrayList<>();
List<KV<String, Integer>> expectedOutput = new ArrayList<>();
for (Integer key = 0; key < numKeys; ++key) {
// Each key should have just one final output at GC time
expectedOutput.add(KV.of(key.toString(), timerOutput));
for (int i = 0; i < numDuplicateTimers; ++i) {
// Each input should be output with the offset added
input.add(KV.of(key.toString(), i));
expectedOutput.add(KV.of(key.toString(), i + offset));
}
}
Collections.shuffle(input);
DoFn<byte[], KV<String, Integer>> inputFn = new DoFn<byte[], KV<String, Integer>>() {
@ProcessElement
public void processElement(ProcessContext context) {
for (KV<String, Integer> stringIntegerKV : input) {
context.output(stringIntegerKV);
}
}
};
DoFn<KV<String, Integer>, KV<String, Integer>> testFn = new DoFn<KV<String, Integer>, KV<String, Integer>>() {
@TimerId(timerId)
private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@StateId(stateId)
private final StateSpec<ValueState<String>> stateSpec = StateSpecs.value(StringUtf8Coder.of());
@ProcessElement
public void processElement(ProcessContext context, @TimerId(timerId) Timer timer, @StateId(stateId) ValueState<String> state, BoundedWindow window) {
timer.set(window.maxTimestamp());
state.write(context.element().getKey());
context.output(KV.of(context.element().getKey(), context.element().getValue() + offset));
}
@OnTimer(timerId)
public void onTimer(@StateId(stateId) ValueState<String> state, OutputReceiver<KV<String, Integer>> r) {
String read = Objects.requireNonNull(state.read(), "State must not be null");
KV<String, Integer> of = KV.of(read, timerOutput);
r.output(of);
}
};
final Pipeline pipeline = Pipeline.create(options);
PCollection<KV<String, Integer>> output = pipeline.apply("Impulse", Impulse.create()).apply("Input", ParDo.of(inputFn)).apply("Timers", ParDo.of(testFn));
PAssert.that(output).containsInAnyOrder(expectedOutput);
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
JobInvocation jobInvocation = FlinkJobInvoker.create(null).createJobInvocation("id", "none", flinkJobExecutor, pipelineProto, options, new FlinkPipelineRunner(options, null, Collections.emptyList()));
jobInvocation.start();
while (jobInvocation.getState() != JobState.Enum.DONE) {
Thread.sleep(1000);
}
assertThat(jobInvocation.getState(), is(JobState.Enum.DONE));
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class RemoteExecutionTest method testExecutionWithTimer.
@Test
public void testExecutionWithTimer() throws Exception {
launchSdkHarness(PipelineOptionsFactory.create());
Pipeline p = Pipeline.create();
p.apply("impulse", Impulse.create()).apply("create", ParDo.of(new DoFn<byte[], KV<String, String>>() {
@ProcessElement
public void process(ProcessContext ctxt) {
}
})).setCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())).apply("timer", ParDo.of(new DoFn<KV<String, String>, KV<String, String>>() {
@TimerId("event")
private final TimerSpec eventTimerSpec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@TimerId("processing")
private final TimerSpec processingTimerSpec = TimerSpecs.timer(TimeDomain.PROCESSING_TIME);
@ProcessElement
public void processElement(ProcessContext context, @TimerId("event") Timer eventTimeTimer, @TimerId("processing") Timer processingTimeTimer) {
context.output(KV.of("main" + context.element().getKey(), ""));
eventTimeTimer.withOutputTimestamp(context.timestamp()).set(context.timestamp().plus(Duration.millis(1L)));
processingTimeTimer.offset(Duration.millis(2L));
processingTimeTimer.setRelative();
}
@OnTimer("event")
public void eventTimer(OnTimerContext context, @Key String key, @TimerId("event") Timer eventTimeTimer, @TimerId("processing") Timer processingTimeTimer) {
context.output(KV.of("event", key));
eventTimeTimer.withOutputTimestamp(context.timestamp()).set(context.fireTimestamp().plus(Duration.millis(11L)));
processingTimeTimer.offset(Duration.millis(12L));
processingTimeTimer.setRelative();
}
@OnTimer("processing")
public void processingTimer(OnTimerContext context, @Key String key, @TimerId("event") Timer eventTimeTimer, @TimerId("processing") Timer processingTimeTimer) {
context.output(KV.of("processing", key));
eventTimeTimer.withOutputTimestamp(context.timestamp()).set(context.fireTimestamp().plus(Duration.millis(21L)));
processingTimeTimer.offset(Duration.millis(22L));
processingTimeTimer.setRelative();
}
@OnWindowExpiration
public void onWindowExpiration(@Key String key, OutputReceiver<KV<String, String>> outputReceiver) {
outputReceiver.output(KV.of("onWindowExpiration", key));
}
})).apply("gbk", GroupByKey.create());
RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p);
FusedPipeline fused = GreedyPipelineFuser.fuse(pipelineProto);
Optional<ExecutableStage> optionalStage = Iterables.tryFind(fused.getFusedStages(), (ExecutableStage stage) -> !stage.getTimers().isEmpty());
checkState(optionalStage.isPresent(), "Expected a stage with timers.");
ExecutableStage stage = optionalStage.get();
ExecutableProcessBundleDescriptor descriptor = ProcessBundleDescriptors.fromExecutableStage("test_stage", stage, dataServer.getApiServiceDescriptor(), stateServer.getApiServiceDescriptor());
BundleProcessor processor = controlClient.getProcessor(descriptor.getProcessBundleDescriptor(), descriptor.getRemoteInputDestinations(), stateDelegator, descriptor.getTimerSpecs());
Map<String, Collection<WindowedValue<?>>> outputValues = new HashMap<>();
Map<String, RemoteOutputReceiver<?>> outputReceivers = new HashMap<>();
for (Entry<String, Coder> remoteOutputCoder : descriptor.getRemoteOutputCoders().entrySet()) {
List<WindowedValue<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
outputValues.put(remoteOutputCoder.getKey(), outputContents);
outputReceivers.put(remoteOutputCoder.getKey(), RemoteOutputReceiver.of((Coder<WindowedValue<?>>) remoteOutputCoder.getValue(), outputContents::add));
}
Map<KV<String, String>, Collection<org.apache.beam.runners.core.construction.Timer<?>>> timerValues = new HashMap<>();
Map<KV<String, String>, RemoteOutputReceiver<org.apache.beam.runners.core.construction.Timer<?>>> timerReceivers = new HashMap<>();
for (Map.Entry<String, Map<String, ProcessBundleDescriptors.TimerSpec>> transformTimerSpecs : descriptor.getTimerSpecs().entrySet()) {
for (ProcessBundleDescriptors.TimerSpec timerSpec : transformTimerSpecs.getValue().values()) {
KV<String, String> key = KV.of(timerSpec.transformId(), timerSpec.timerId());
List<org.apache.beam.runners.core.construction.Timer<?>> outputContents = Collections.synchronizedList(new ArrayList<>());
timerValues.put(key, outputContents);
timerReceivers.put(key, RemoteOutputReceiver.of((Coder<org.apache.beam.runners.core.construction.Timer<?>>) timerSpec.coder(), outputContents::add));
}
}
ProcessBundleDescriptors.TimerSpec eventTimerSpec = null;
ProcessBundleDescriptors.TimerSpec processingTimerSpec = null;
ProcessBundleDescriptors.TimerSpec onWindowExpirationSpec = null;
for (Map<String, ProcessBundleDescriptors.TimerSpec> timerSpecs : descriptor.getTimerSpecs().values()) {
for (ProcessBundleDescriptors.TimerSpec timerSpec : timerSpecs.values()) {
if ("onWindowExpiration0".equals(timerSpec.timerId())) {
onWindowExpirationSpec = timerSpec;
} else if (TimeDomain.EVENT_TIME.equals(timerSpec.getTimerSpec().getTimeDomain())) {
eventTimerSpec = timerSpec;
} else if (TimeDomain.PROCESSING_TIME.equals(timerSpec.getTimerSpec().getTimeDomain())) {
processingTimerSpec = timerSpec;
} else {
fail(String.format("Unknown timer specification %s", timerSpec));
}
}
}
// Set the current system time to a fixed value to get stable values for processing time timer
// output.
DateTimeUtils.setCurrentMillisFixed(BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis() + 10000L);
try {
try (RemoteBundle bundle = processor.newBundle(outputReceivers, timerReceivers, StateRequestHandler.unsupported(), BundleProgressHandler.ignored(), null, null)) {
Iterables.getOnlyElement(bundle.getInputReceivers().values()).accept(valueInGlobalWindow(KV.of("X", "X")));
bundle.getTimerReceivers().get(KV.of(eventTimerSpec.transformId(), eventTimerSpec.timerId())).accept(timerForTest("Y", 1000L, 100L));
bundle.getTimerReceivers().get(KV.of(processingTimerSpec.transformId(), processingTimerSpec.timerId())).accept(timerForTest("Z", 2000L, 200L));
bundle.getTimerReceivers().get(KV.of(onWindowExpirationSpec.transformId(), onWindowExpirationSpec.timerId())).accept(timerForTest("key", 5001L, 5000L));
}
String mainOutputTransform = Iterables.getOnlyElement(descriptor.getRemoteOutputCoders().keySet());
assertThat(outputValues.get(mainOutputTransform), containsInAnyOrder(valueInGlobalWindow(KV.of("mainX", "")), WindowedValue.timestampedValueInGlobalWindow(KV.of("event", "Y"), BoundedWindow.TIMESTAMP_MIN_VALUE.plus(Duration.millis(100L))), WindowedValue.timestampedValueInGlobalWindow(KV.of("processing", "Z"), BoundedWindow.TIMESTAMP_MIN_VALUE.plus(Duration.millis(200L))), WindowedValue.timestampedValueInGlobalWindow(KV.of("onWindowExpiration", "key"), BoundedWindow.TIMESTAMP_MIN_VALUE.plus(Duration.millis(5000L)))));
assertThat(timerValues.get(KV.of(eventTimerSpec.transformId(), eventTimerSpec.timerId())), containsInAnyOrder(timerForTest("X", 1L, 0L), timerForTest("Y", 1011L, 100L), timerForTest("Z", 2021L, 200L)));
assertThat(timerValues.get(KV.of(processingTimerSpec.transformId(), processingTimerSpec.timerId())), containsInAnyOrder(timerForTest("X", 10002L, 0L), timerForTest("Y", 10012L, 100L), timerForTest("Z", 10022L, 200L)));
} finally {
DateTimeUtils.setCurrentMillisSystem();
}
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class DoFnSignaturesTest method testSimpleTimerWithContext.
@Test
public void testSimpleTimerWithContext() throws Exception {
DoFnSignature sig = DoFnSignatures.getSignature(new DoFn<KV<String, Integer>, Long>() {
@TimerId("foo")
private final TimerSpec bizzle = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void foo(ProcessContext context) {
}
@OnTimer("foo")
public void onFoo(OnTimerContext c) {
}
}.getClass());
final String timerDeclarationId = TimerDeclaration.PREFIX + "foo";
assertThat(sig.timerDeclarations().size(), equalTo(1));
DoFnSignature.TimerDeclaration decl = sig.timerDeclarations().get(timerDeclarationId);
assertThat(decl.id(), equalTo(timerDeclarationId));
assertThat(decl.field().getName(), equalTo("bizzle"));
assertThat(sig.onTimerMethods().get(timerDeclarationId).extraParameters().get(0), equalTo((Parameter) Parameter.onTimerContext()));
}
use of org.apache.beam.sdk.state.TimerSpec in project beam by apache.
the class DoFnSignaturesTest method testAllParamsOnTimer.
@Test
public void testAllParamsOnTimer() throws Exception {
final String timerId = "some-timer-id";
final String timerDeclarationId = TimerDeclaration.PREFIX + timerId;
DoFnSignature sig = DoFnSignatures.getSignature(new DoFn<String, String>() {
@TimerId(timerId)
private final TimerSpec myfield1 = TimerSpecs.timer(TimeDomain.EVENT_TIME);
@ProcessElement
public void process(ProcessContext c) {
}
@OnTimer(timerId)
public void onTimer(@Timestamp Instant timestamp, TimeDomain timeDomain, BoundedWindow w) {
}
}.getClass());
assertThat(sig.onTimerMethods().get(timerDeclarationId).extraParameters().size(), equalTo(3));
assertThat(sig.onTimerMethods().get(timerDeclarationId).extraParameters().get(0), instanceOf(TimestampParameter.class));
assertThat(sig.onTimerMethods().get(timerDeclarationId).extraParameters().get(1), instanceOf(TimeDomainParameter.class));
assertThat(sig.onTimerMethods().get(timerDeclarationId).extraParameters().get(2), instanceOf(WindowParameter.class));
}
Aggregations